invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/adapters/__init__.py +10 -14
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/certify.py +600 -59
- invarlock/cli/commands/doctor.py +8 -10
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +907 -183
- invarlock/cli/commands/verify.py +76 -11
- invarlock/cli/config.py +1 -1
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/registry.py +9 -11
- invarlock/core/runner.py +111 -25
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -3
- invarlock/eval/data.py +68 -23
- invarlock/eval/metrics.py +59 -1
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/rmt.py +2 -2
- invarlock/guards/variance.py +2 -2
- invarlock/model_profile.py +48 -27
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +159 -9
- invarlock/reporting/certificate_schema.py +1 -1
- invarlock/reporting/guards_analysis.py +154 -4
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +7 -0
- invarlock/reporting/render.py +791 -431
- invarlock/reporting/report.py +39 -3
- invarlock/reporting/report_types.py +6 -1
- invarlock/reporting/telemetry.py +86 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/reporting/render.py
CHANGED
|
@@ -113,6 +113,462 @@ def _short_digest(v: str) -> str:
|
|
|
113
113
|
return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
def _render_executive_dashboard(cert: dict[str, Any]) -> str:
|
|
117
|
+
"""Render executive summary dashboard table."""
|
|
118
|
+
lines: list[str] = []
|
|
119
|
+
_append_safety_dashboard_section(lines, cert)
|
|
120
|
+
return "\n".join(lines).rstrip()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _append_safety_dashboard_section(
|
|
124
|
+
lines: list[str], certificate: dict[str, Any]
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Append a concise, first-screen dashboard for the certificate."""
|
|
127
|
+
block = compute_console_validation_block(certificate)
|
|
128
|
+
overall_pass = bool(block.get("overall_pass"))
|
|
129
|
+
overall_status = (
|
|
130
|
+
f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
validation = certificate.get("validation", {}) or {}
|
|
134
|
+
pm = certificate.get("primary_metric", {}) or {}
|
|
135
|
+
auto = certificate.get("auto", {}) or {}
|
|
136
|
+
tier = str(auto.get("tier") or "balanced").lower()
|
|
137
|
+
|
|
138
|
+
# Primary metric summary
|
|
139
|
+
pm_kind = str(pm.get("kind", "")).lower()
|
|
140
|
+
pm_basis = pm.get("gating_basis") or pm.get("basis") or "point"
|
|
141
|
+
pm_ok: bool | None
|
|
142
|
+
if isinstance(validation, dict) and "primary_metric_acceptable" in validation:
|
|
143
|
+
pm_ok = bool(validation.get("primary_metric_acceptable"))
|
|
144
|
+
else:
|
|
145
|
+
pm_ok = None
|
|
146
|
+
pm_value = pm.get("ratio_vs_baseline")
|
|
147
|
+
|
|
148
|
+
if pm_kind in {"accuracy", "vqa_accuracy"}:
|
|
149
|
+
measured = f"{pm_value:+.2f} pp" if isinstance(pm_value, int | float) else "N/A"
|
|
150
|
+
th_map = {
|
|
151
|
+
"conservative": -0.5,
|
|
152
|
+
"balanced": -1.0,
|
|
153
|
+
"aggressive": -2.0,
|
|
154
|
+
"none": -1.0,
|
|
155
|
+
}
|
|
156
|
+
th = th_map.get(tier, -1.0)
|
|
157
|
+
threshold = f"≥ {th:+.2f} pp ({pm_basis})"
|
|
158
|
+
else:
|
|
159
|
+
measured = f"{pm_value:.3f}×" if isinstance(pm_value, int | float) else "N/A"
|
|
160
|
+
tier_thresholds = {
|
|
161
|
+
"conservative": 1.05,
|
|
162
|
+
"balanced": 1.10,
|
|
163
|
+
"aggressive": 1.20,
|
|
164
|
+
"none": 1.10,
|
|
165
|
+
}
|
|
166
|
+
ratio_limit = tier_thresholds.get(tier, 1.10)
|
|
167
|
+
target_ratio = auto.get("target_pm_ratio")
|
|
168
|
+
if isinstance(target_ratio, int | float) and target_ratio > 0:
|
|
169
|
+
ratio_limit = min(ratio_limit, float(target_ratio))
|
|
170
|
+
threshold = f"≤ {ratio_limit:.2f}× ({pm_basis})"
|
|
171
|
+
|
|
172
|
+
pm_status = (
|
|
173
|
+
f"{'✅' if pm_ok else '❌'} {measured}"
|
|
174
|
+
if isinstance(pm_ok, bool)
|
|
175
|
+
else f"🛈 {measured}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Drift summary (final/preview ratio) when preview/final are numeric
|
|
179
|
+
drift_ok: bool | None
|
|
180
|
+
if isinstance(validation, dict) and "preview_final_drift_acceptable" in validation:
|
|
181
|
+
drift_ok = bool(validation.get("preview_final_drift_acceptable"))
|
|
182
|
+
else:
|
|
183
|
+
drift_ok = None
|
|
184
|
+
drift_val = "N/A"
|
|
185
|
+
try:
|
|
186
|
+
pv = (
|
|
187
|
+
float(pm.get("preview"))
|
|
188
|
+
if isinstance(pm.get("preview"), int | float)
|
|
189
|
+
else float("nan")
|
|
190
|
+
)
|
|
191
|
+
fv = (
|
|
192
|
+
float(pm.get("final"))
|
|
193
|
+
if isinstance(pm.get("final"), int | float)
|
|
194
|
+
else float("nan")
|
|
195
|
+
)
|
|
196
|
+
drift = (
|
|
197
|
+
fv / pv
|
|
198
|
+
if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
|
|
199
|
+
else float("nan")
|
|
200
|
+
)
|
|
201
|
+
if math.isfinite(drift):
|
|
202
|
+
drift_val = f"{drift:.3f}×"
|
|
203
|
+
except Exception:
|
|
204
|
+
drift_val = "N/A"
|
|
205
|
+
drift_status = (
|
|
206
|
+
f"{'✅' if drift_ok else '❌'} {drift_val}"
|
|
207
|
+
if isinstance(drift_ok, bool)
|
|
208
|
+
else f"🛈 {drift_val}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _gate_cell(key: str, ok_default: bool | None = None) -> str:
|
|
212
|
+
ok: bool | None
|
|
213
|
+
if not isinstance(validation, dict):
|
|
214
|
+
ok = ok_default
|
|
215
|
+
elif key not in validation:
|
|
216
|
+
ok = ok_default
|
|
217
|
+
else:
|
|
218
|
+
ok = bool(validation.get(key))
|
|
219
|
+
if ok is None:
|
|
220
|
+
return "🛈 N/A"
|
|
221
|
+
return "✅ PASS" if ok else "❌ FAIL"
|
|
222
|
+
|
|
223
|
+
overhead_ctx = certificate.get("guard_overhead", {}) or {}
|
|
224
|
+
overhead_evaluated = (
|
|
225
|
+
bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
|
|
226
|
+
)
|
|
227
|
+
overhead_row: tuple[str, str, str] | None = None
|
|
228
|
+
if overhead_evaluated:
|
|
229
|
+
overhead_pct = overhead_ctx.get("overhead_percent")
|
|
230
|
+
overhead_ratio = overhead_ctx.get("overhead_ratio")
|
|
231
|
+
if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
|
|
232
|
+
overhead_measured = f"{float(overhead_pct):+.2f}%"
|
|
233
|
+
elif isinstance(overhead_ratio, int | float) and math.isfinite(
|
|
234
|
+
float(overhead_ratio)
|
|
235
|
+
):
|
|
236
|
+
overhead_measured = f"{float(overhead_ratio):.3f}×"
|
|
237
|
+
else:
|
|
238
|
+
overhead_measured = "N/A"
|
|
239
|
+
threshold_pct = overhead_ctx.get("threshold_percent")
|
|
240
|
+
if isinstance(threshold_pct, int | float) and math.isfinite(
|
|
241
|
+
float(threshold_pct)
|
|
242
|
+
):
|
|
243
|
+
threshold_str = f"≤ +{float(threshold_pct):.1f}%"
|
|
244
|
+
else:
|
|
245
|
+
threshold_str = "≤ +1.0%"
|
|
246
|
+
overhead_row = (
|
|
247
|
+
"Overhead",
|
|
248
|
+
f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
|
|
249
|
+
if isinstance(validation, dict)
|
|
250
|
+
else f"🛈 {overhead_measured}",
|
|
251
|
+
threshold_str,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
lines.append("## Safety Dashboard")
|
|
255
|
+
lines.append("")
|
|
256
|
+
lines.append("| Check | Status | Quick Summary |")
|
|
257
|
+
lines.append("|-------|--------|---------------|")
|
|
258
|
+
lines.append(f"| Overall | {overall_status} | Canonical gate outcomes |")
|
|
259
|
+
lines.append(f"| Primary Metric | {pm_status} | {threshold} |")
|
|
260
|
+
lines.append(f"| Drift | {drift_status} | 0.95–1.05× band |")
|
|
261
|
+
lines.append(
|
|
262
|
+
f"| Invariants | {_gate_cell('invariants_pass')} | Model integrity checks |"
|
|
263
|
+
)
|
|
264
|
+
lines.append(
|
|
265
|
+
f"| Spectral | {_gate_cell('spectral_stable')} | Weight matrix spectral norms |"
|
|
266
|
+
)
|
|
267
|
+
lines.append(f"| RMT | {_gate_cell('rmt_stable')} | Random Matrix Theory guard |")
|
|
268
|
+
if overhead_row:
|
|
269
|
+
lines.append(f"| {overhead_row[0]} | {overhead_row[1]} | {overhead_row[2]} |")
|
|
270
|
+
lines.append("")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _append_primary_metric_section(
|
|
274
|
+
lines: list[str], certificate: dict[str, Any]
|
|
275
|
+
) -> None:
|
|
276
|
+
"""Append the Primary Metric section early for quick triage."""
|
|
277
|
+
pm = certificate.get("primary_metric")
|
|
278
|
+
if not isinstance(pm, dict) or not pm:
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
kind = pm.get("kind", "unknown")
|
|
282
|
+
lines.append("## Primary Metric")
|
|
283
|
+
lines.append("")
|
|
284
|
+
unit = pm.get("unit", "-")
|
|
285
|
+
paired = pm.get("paired", False)
|
|
286
|
+
|
|
287
|
+
estimated_flag = False
|
|
288
|
+
try:
|
|
289
|
+
if bool(pm.get("estimated")):
|
|
290
|
+
estimated_flag = True
|
|
291
|
+
elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
|
|
292
|
+
estimated_flag = True
|
|
293
|
+
except Exception:
|
|
294
|
+
estimated_flag = False
|
|
295
|
+
est_suffix = " (estimated)" if estimated_flag else ""
|
|
296
|
+
|
|
297
|
+
lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
|
|
298
|
+
gating_basis = pm.get("gating_basis") or pm.get("basis")
|
|
299
|
+
if gating_basis:
|
|
300
|
+
lines.append(f"- Basis: {gating_basis}")
|
|
301
|
+
if isinstance(paired, bool):
|
|
302
|
+
lines.append(f"- Paired: {paired}")
|
|
303
|
+
reps = pm.get("reps")
|
|
304
|
+
if isinstance(reps, int | float):
|
|
305
|
+
lines.append(f"- Bootstrap Reps: {int(reps)}")
|
|
306
|
+
ci = pm.get("ci") or pm.get("display_ci")
|
|
307
|
+
if (
|
|
308
|
+
isinstance(ci, list | tuple)
|
|
309
|
+
and len(ci) == 2
|
|
310
|
+
and all(isinstance(x, int | float) for x in ci)
|
|
311
|
+
):
|
|
312
|
+
lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
|
|
313
|
+
|
|
314
|
+
prev = pm.get("preview")
|
|
315
|
+
fin = pm.get("final")
|
|
316
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
317
|
+
|
|
318
|
+
lines.append("")
|
|
319
|
+
if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
|
|
320
|
+
lines.append(
|
|
321
|
+
"- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
|
|
322
|
+
)
|
|
323
|
+
lines.append("| Field | Value |")
|
|
324
|
+
lines.append("|-------|-------|")
|
|
325
|
+
lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
|
|
326
|
+
lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
|
|
327
|
+
|
|
328
|
+
if kind in {"accuracy", "vqa_accuracy"}:
|
|
329
|
+
lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
|
|
330
|
+
try:
|
|
331
|
+
base_pt = pm.get("baseline_point")
|
|
332
|
+
if isinstance(base_pt, int | float) and base_pt < 0.05:
|
|
333
|
+
lines.append("- Note: baseline < 5%; ratio suppressed; showing Δpp")
|
|
334
|
+
except Exception:
|
|
335
|
+
pass
|
|
336
|
+
else:
|
|
337
|
+
try:
|
|
338
|
+
lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
|
|
339
|
+
except Exception:
|
|
340
|
+
lines.append("| Ratio vs Baseline | N/A |")
|
|
341
|
+
lines.append("")
|
|
342
|
+
|
|
343
|
+
# Secondary metrics (informational)
|
|
344
|
+
try:
|
|
345
|
+
secs = certificate.get("secondary_metrics")
|
|
346
|
+
if isinstance(secs, list) and secs:
|
|
347
|
+
lines.append("## Secondary Metrics (informational)")
|
|
348
|
+
lines.append("")
|
|
349
|
+
lines.append("| Kind | Preview | Final | vs Baseline | CI |")
|
|
350
|
+
lines.append("|------|---------|-------|-------------|----|")
|
|
351
|
+
for m in secs:
|
|
352
|
+
if not isinstance(m, dict):
|
|
353
|
+
continue
|
|
354
|
+
k = m.get("kind", "?")
|
|
355
|
+
pv = _fmt_by_kind(m.get("preview"), str(k))
|
|
356
|
+
fv = _fmt_by_kind(m.get("final"), str(k))
|
|
357
|
+
rb = m.get("ratio_vs_baseline")
|
|
358
|
+
try:
|
|
359
|
+
rb_str = (
|
|
360
|
+
f"{float(rb):.3f}"
|
|
361
|
+
if (str(k).startswith("ppl"))
|
|
362
|
+
else _fmt_by_kind(rb, str(k))
|
|
363
|
+
)
|
|
364
|
+
except Exception:
|
|
365
|
+
rb_str = "N/A"
|
|
366
|
+
ci = m.get("display_ci") or m.get("ci")
|
|
367
|
+
if isinstance(ci, tuple | list) and len(ci) == 2:
|
|
368
|
+
ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
|
|
369
|
+
else:
|
|
370
|
+
ci_str = "–"
|
|
371
|
+
lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
|
|
372
|
+
lines.append("")
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _append_policy_configuration_section(
|
|
378
|
+
lines: list[str], certificate: dict[str, Any]
|
|
379
|
+
) -> None:
|
|
380
|
+
resolved_policy = certificate.get("resolved_policy")
|
|
381
|
+
policy_provenance = certificate.get("policy_provenance", {}) or {}
|
|
382
|
+
has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
|
|
383
|
+
has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
|
|
384
|
+
if not (has_prov or has_resolved):
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
lines.append("## Policy Configuration")
|
|
388
|
+
lines.append("")
|
|
389
|
+
|
|
390
|
+
tier = None
|
|
391
|
+
if has_prov:
|
|
392
|
+
tier = policy_provenance.get("tier")
|
|
393
|
+
if not tier:
|
|
394
|
+
tier = (certificate.get("auto", {}) or {}).get("tier")
|
|
395
|
+
digest_value = None
|
|
396
|
+
if has_prov:
|
|
397
|
+
digest_value = policy_provenance.get("policy_digest")
|
|
398
|
+
if not digest_value:
|
|
399
|
+
digest_value = (certificate.get("policy_digest", {}) or {}).get(
|
|
400
|
+
"thresholds_hash"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
summary_parts: list[str] = []
|
|
404
|
+
if tier:
|
|
405
|
+
summary_parts.append(f"**Tier:** {tier}")
|
|
406
|
+
if digest_value:
|
|
407
|
+
summary_parts.append(f"**Digest:** `{_short_digest(str(digest_value))}`")
|
|
408
|
+
if summary_parts:
|
|
409
|
+
lines.append(" | ".join(summary_parts))
|
|
410
|
+
|
|
411
|
+
if has_prov:
|
|
412
|
+
overrides_list = policy_provenance.get("overrides") or []
|
|
413
|
+
if overrides_list:
|
|
414
|
+
lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
|
|
415
|
+
else:
|
|
416
|
+
lines.append("- **Overrides:** (none)")
|
|
417
|
+
if policy_provenance.get("resolved_at"):
|
|
418
|
+
lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
|
|
419
|
+
|
|
420
|
+
if has_resolved:
|
|
421
|
+
lines.append("")
|
|
422
|
+
lines.append("<details>")
|
|
423
|
+
lines.append("<summary>Resolved Policy YAML</summary>")
|
|
424
|
+
lines.append("")
|
|
425
|
+
lines.append("```yaml")
|
|
426
|
+
resolved_yaml = yaml.safe_dump(
|
|
427
|
+
resolved_policy, sort_keys=True, width=80, default_flow_style=False
|
|
428
|
+
).strip()
|
|
429
|
+
for line in resolved_yaml.splitlines():
|
|
430
|
+
lines.append(line)
|
|
431
|
+
lines.append("```")
|
|
432
|
+
lines.append("")
|
|
433
|
+
lines.append("</details>")
|
|
434
|
+
|
|
435
|
+
lines.append("")
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _append_dataset_and_provenance_section(
|
|
439
|
+
lines: list[str], certificate: dict[str, Any]
|
|
440
|
+
) -> None:
|
|
441
|
+
dataset = certificate.get("dataset", {}) or {}
|
|
442
|
+
provenance_info = certificate.get("provenance", {}) or {}
|
|
443
|
+
|
|
444
|
+
has_dataset = isinstance(dataset, dict) and bool(dataset)
|
|
445
|
+
has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
|
|
446
|
+
if not (has_dataset or has_provenance):
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
lines.append("## Dataset and Provenance")
|
|
450
|
+
lines.append("")
|
|
451
|
+
|
|
452
|
+
if has_dataset:
|
|
453
|
+
prov = dataset.get("provider") or "unknown"
|
|
454
|
+
lines.append(f"- **Provider:** {prov}")
|
|
455
|
+
try:
|
|
456
|
+
seq_len_val = (
|
|
457
|
+
int(dataset.get("seq_len"))
|
|
458
|
+
if isinstance(dataset.get("seq_len"), int | float)
|
|
459
|
+
else dataset.get("seq_len")
|
|
460
|
+
)
|
|
461
|
+
except Exception: # pragma: no cover - defensive
|
|
462
|
+
seq_len_val = dataset.get("seq_len")
|
|
463
|
+
if seq_len_val is not None:
|
|
464
|
+
lines.append(f"- **Sequence Length:** {seq_len_val}")
|
|
465
|
+
windows_blk = (
|
|
466
|
+
dataset.get("windows", {})
|
|
467
|
+
if isinstance(dataset.get("windows"), dict)
|
|
468
|
+
else {}
|
|
469
|
+
)
|
|
470
|
+
win_prev = windows_blk.get("preview")
|
|
471
|
+
win_final = windows_blk.get("final")
|
|
472
|
+
if win_prev is not None and win_final is not None:
|
|
473
|
+
lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
|
|
474
|
+
if windows_blk.get("seed") is not None:
|
|
475
|
+
lines.append(f"- **Seed:** {windows_blk.get('seed')}")
|
|
476
|
+
hash_blk = (
|
|
477
|
+
dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
|
|
478
|
+
)
|
|
479
|
+
if hash_blk.get("preview_tokens") is not None:
|
|
480
|
+
lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
|
|
481
|
+
if hash_blk.get("final_tokens") is not None:
|
|
482
|
+
lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
|
|
483
|
+
if hash_blk.get("total_tokens") is not None:
|
|
484
|
+
lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
|
|
485
|
+
if hash_blk.get("dataset"):
|
|
486
|
+
lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
|
|
487
|
+
tokenizer = dataset.get("tokenizer", {})
|
|
488
|
+
if isinstance(tokenizer, dict) and (
|
|
489
|
+
tokenizer.get("name") or tokenizer.get("hash")
|
|
490
|
+
):
|
|
491
|
+
vocab_size = tokenizer.get("vocab_size")
|
|
492
|
+
vocab_suffix = (
|
|
493
|
+
f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
|
|
494
|
+
)
|
|
495
|
+
lines.append(
|
|
496
|
+
f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
|
|
497
|
+
)
|
|
498
|
+
if tokenizer.get("hash"):
|
|
499
|
+
lines.append(f" - Hash: {tokenizer['hash']}")
|
|
500
|
+
lines.append(
|
|
501
|
+
f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
|
|
502
|
+
)
|
|
503
|
+
if tokenizer.get("pad_token") is not None:
|
|
504
|
+
lines.append(f" - PAD: {tokenizer.get('pad_token')}")
|
|
505
|
+
if tokenizer.get("add_prefix_space") is not None:
|
|
506
|
+
lines.append(
|
|
507
|
+
f" - add_prefix_space: {tokenizer.get('add_prefix_space')}"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if has_provenance:
|
|
511
|
+
baseline_info = provenance_info.get("baseline", {}) or {}
|
|
512
|
+
edited_info = provenance_info.get("edited", {}) or {}
|
|
513
|
+
|
|
514
|
+
if baseline_info or edited_info:
|
|
515
|
+
lines.append("")
|
|
516
|
+
if baseline_info:
|
|
517
|
+
lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
|
|
518
|
+
if baseline_info.get("report_hash"):
|
|
519
|
+
lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
|
|
520
|
+
if baseline_info.get("report_path"):
|
|
521
|
+
lines.append(f" - Report Path: {baseline_info.get('report_path')}")
|
|
522
|
+
if edited_info:
|
|
523
|
+
lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
|
|
524
|
+
if edited_info.get("report_hash"):
|
|
525
|
+
lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
|
|
526
|
+
if edited_info.get("report_path"):
|
|
527
|
+
lines.append(f" - Report Path: {edited_info.get('report_path')}")
|
|
528
|
+
|
|
529
|
+
provider_digest = provenance_info.get("provider_digest")
|
|
530
|
+
if isinstance(provider_digest, dict) and provider_digest:
|
|
531
|
+
ids_d = provider_digest.get("ids_sha256")
|
|
532
|
+
tok_d = provider_digest.get("tokenizer_sha256")
|
|
533
|
+
mask_d = provider_digest.get("masking_sha256")
|
|
534
|
+
|
|
535
|
+
lines.append("- **Provider Digest:**")
|
|
536
|
+
if tok_d:
|
|
537
|
+
lines.append(
|
|
538
|
+
f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
|
|
539
|
+
)
|
|
540
|
+
if ids_d:
|
|
541
|
+
lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
|
|
542
|
+
if mask_d:
|
|
543
|
+
lines.append(
|
|
544
|
+
f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
conf = certificate.get("confidence", {}) or {}
|
|
549
|
+
if isinstance(conf, dict) and conf.get("label"):
|
|
550
|
+
lines.append(f"- **Confidence:** {conf.get('label')}")
|
|
551
|
+
except Exception:
|
|
552
|
+
pass
|
|
553
|
+
|
|
554
|
+
try:
|
|
555
|
+
pd = certificate.get("policy_digest", {}) or {}
|
|
556
|
+
if isinstance(pd, dict) and pd:
|
|
557
|
+
pv = pd.get("policy_version")
|
|
558
|
+
th = pd.get("thresholds_hash")
|
|
559
|
+
if pv:
|
|
560
|
+
lines.append(f"- **Policy Version:** {pv}")
|
|
561
|
+
if isinstance(th, str) and th:
|
|
562
|
+
short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
|
|
563
|
+
lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
|
|
564
|
+
if pd.get("changed"):
|
|
565
|
+
lines.append("- Note: policy changed")
|
|
566
|
+
except Exception:
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
lines.append("")
|
|
570
|
+
|
|
571
|
+
|
|
116
572
|
def _fmt_by_kind(x: Any, k: str) -> str:
|
|
117
573
|
try:
|
|
118
574
|
xv = float(x)
|
|
@@ -275,11 +731,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
275
731
|
if not validate_certificate(certificate):
|
|
276
732
|
raise ValueError("Invalid certificate structure")
|
|
277
733
|
|
|
278
|
-
lines = []
|
|
734
|
+
lines: list[str] = []
|
|
735
|
+
appendix_lines: list[str] = []
|
|
279
736
|
edit_name = str(certificate.get("edit_name") or "").lower()
|
|
280
737
|
|
|
281
738
|
# Header
|
|
282
|
-
lines.append("# InvarLock
|
|
739
|
+
lines.append("# InvarLock Evaluation Certificate")
|
|
283
740
|
lines.append("")
|
|
284
741
|
lines.append(
|
|
285
742
|
"> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
|
|
@@ -291,6 +748,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
291
748
|
lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
|
|
292
749
|
lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
|
|
293
750
|
lines.append("")
|
|
751
|
+
lines.append(
|
|
752
|
+
"> Full evidence: see [`evaluation.cert.json`](evaluation.cert.json) for complete provenance, digests, and raw measurements."
|
|
753
|
+
)
|
|
754
|
+
lines.append("")
|
|
294
755
|
|
|
295
756
|
plugins = certificate.get("plugins", {})
|
|
296
757
|
if isinstance(plugins, dict) and plugins:
|
|
@@ -314,7 +775,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
314
775
|
]
|
|
315
776
|
if guard_entries:
|
|
316
777
|
lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
|
|
317
|
-
|
|
778
|
+
lines.append("")
|
|
318
779
|
|
|
319
780
|
# Executive Summary with validation status (canonical, from console block)
|
|
320
781
|
lines.append("## Executive Summary")
|
|
@@ -354,6 +815,25 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
354
815
|
pass
|
|
355
816
|
lines.append("")
|
|
356
817
|
|
|
818
|
+
dashboard = _render_executive_dashboard(certificate)
|
|
819
|
+
if dashboard:
|
|
820
|
+
lines.extend(dashboard.splitlines())
|
|
821
|
+
lines.append("")
|
|
822
|
+
|
|
823
|
+
lines.append("## Contents")
|
|
824
|
+
lines.append("")
|
|
825
|
+
lines.append("- [Safety Dashboard](#safety-dashboard)")
|
|
826
|
+
lines.append("- [Quality Gates](#quality-gates)")
|
|
827
|
+
lines.append("- [Safety Check Details](#safety-check-details)")
|
|
828
|
+
lines.append("- [Primary Metric](#primary-metric)")
|
|
829
|
+
lines.append("- [Guard Observability](#guard-observability)")
|
|
830
|
+
lines.append("- [Model Information](#model-information)")
|
|
831
|
+
lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
|
|
832
|
+
lines.append("- [Policy Configuration](#policy-configuration)")
|
|
833
|
+
lines.append("- [Appendix](#appendix)")
|
|
834
|
+
lines.append("- [Certificate Integrity](#certificate-integrity)")
|
|
835
|
+
lines.append("")
|
|
836
|
+
|
|
357
837
|
# Validation table with canonical gates (mirrors console allow-list)
|
|
358
838
|
lines.append("## Quality Gates")
|
|
359
839
|
lines.append("")
|
|
@@ -410,6 +890,31 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
410
890
|
)
|
|
411
891
|
)
|
|
412
892
|
status = "✅ PASS" if ok else "❌ FAIL"
|
|
893
|
+
drift_min = 0.95
|
|
894
|
+
drift_max = 1.05
|
|
895
|
+
try:
|
|
896
|
+
drift_band = (
|
|
897
|
+
pm_block.get("drift_band") if isinstance(pm_block, dict) else None
|
|
898
|
+
)
|
|
899
|
+
if isinstance(drift_band, dict):
|
|
900
|
+
lo = drift_band.get("min")
|
|
901
|
+
hi = drift_band.get("max")
|
|
902
|
+
if isinstance(lo, int | float) and isinstance(hi, int | float):
|
|
903
|
+
lo_f = float(lo)
|
|
904
|
+
hi_f = float(hi)
|
|
905
|
+
if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
|
|
906
|
+
drift_min = lo_f
|
|
907
|
+
drift_max = hi_f
|
|
908
|
+
elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
|
|
909
|
+
lo_raw, hi_raw = drift_band[0], drift_band[1]
|
|
910
|
+
if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
|
|
911
|
+
lo_f = float(lo_raw)
|
|
912
|
+
hi_f = float(hi_raw)
|
|
913
|
+
if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
|
|
914
|
+
drift_min = lo_f
|
|
915
|
+
drift_max = hi_f
|
|
916
|
+
except Exception:
|
|
917
|
+
pass
|
|
413
918
|
# Compute drift from PM preview/final when available
|
|
414
919
|
try:
|
|
415
920
|
pv = (
|
|
@@ -430,8 +935,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
430
935
|
except Exception:
|
|
431
936
|
drift = float("nan")
|
|
432
937
|
measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
|
|
938
|
+
band_label = f"{drift_min:.2f}–{drift_max:.2f}x"
|
|
433
939
|
lines.append(
|
|
434
|
-
f"| Preview Final Drift Acceptable | {status} | {measured} |
|
|
940
|
+
f"| Preview Final Drift Acceptable | {status} | {measured} | {band_label} | point | Final/Preview ratio stability |"
|
|
435
941
|
)
|
|
436
942
|
|
|
437
943
|
# Helper to emit Guard Overhead Acceptable row (only when evaluated)
|
|
@@ -616,14 +1122,39 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
616
1122
|
or overlap_frac is not None
|
|
617
1123
|
):
|
|
618
1124
|
lines.append("")
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
1125
|
+
parts: list[str] = []
|
|
1126
|
+
if paired_windows is not None:
|
|
1127
|
+
try:
|
|
1128
|
+
parts.append(f"{int(paired_windows)} windows")
|
|
1129
|
+
except Exception:
|
|
1130
|
+
parts.append(f"windows={paired_windows}")
|
|
1131
|
+
if isinstance(match_frac, int | float) and math.isfinite(float(match_frac)):
|
|
1132
|
+
parts.append(f"{float(match_frac) * 100.0:.1f}% match")
|
|
1133
|
+
elif match_frac is not None:
|
|
1134
|
+
parts.append(f"match={match_frac}")
|
|
1135
|
+
if isinstance(overlap_frac, int | float) and math.isfinite(
|
|
1136
|
+
float(overlap_frac)
|
|
1137
|
+
):
|
|
1138
|
+
parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
|
|
1139
|
+
elif overlap_frac is not None:
|
|
1140
|
+
parts.append(f"overlap={overlap_frac}")
|
|
1141
|
+
lines.append(f"✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
|
|
622
1142
|
if isinstance(bootstrap, dict):
|
|
623
1143
|
reps = bootstrap.get("replicates")
|
|
624
1144
|
bseed = bootstrap.get("seed")
|
|
625
1145
|
if reps is not None or bseed is not None:
|
|
626
|
-
|
|
1146
|
+
bits: list[str] = []
|
|
1147
|
+
if reps is not None:
|
|
1148
|
+
try:
|
|
1149
|
+
bits.append(f"{int(reps)} replicates")
|
|
1150
|
+
except Exception:
|
|
1151
|
+
bits.append(f"replicates={reps}")
|
|
1152
|
+
if bseed is not None:
|
|
1153
|
+
try:
|
|
1154
|
+
bits.append(f"seed={int(bseed)}")
|
|
1155
|
+
except Exception:
|
|
1156
|
+
bits.append(f"seed={bseed}")
|
|
1157
|
+
lines.append(f"✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
|
|
627
1158
|
# Optional: show log-space paired Δ CI next to ratio CI for clarity
|
|
628
1159
|
delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
|
|
629
1160
|
"ppl", {}
|
|
@@ -633,7 +1164,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
633
1164
|
and len(delta_ci) == 2
|
|
634
1165
|
and all(isinstance(x, int | float) for x in delta_ci)
|
|
635
1166
|
):
|
|
636
|
-
lines.append(f"
|
|
1167
|
+
lines.append(f"🛈 Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
|
|
637
1168
|
except Exception:
|
|
638
1169
|
pass
|
|
639
1170
|
|
|
@@ -654,116 +1185,179 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
654
1185
|
|
|
655
1186
|
lines.append("")
|
|
656
1187
|
|
|
1188
|
+
_append_primary_metric_section(lines, certificate)
|
|
1189
|
+
|
|
657
1190
|
# Guard observability snapshots
|
|
658
1191
|
lines.append("## Guard Observability")
|
|
659
1192
|
lines.append("")
|
|
660
1193
|
|
|
661
1194
|
spectral_info = certificate.get("spectral", {}) or {}
|
|
662
1195
|
if spectral_info:
|
|
663
|
-
lines.append("### Spectral Guard")
|
|
1196
|
+
lines.append("### Spectral Guard Summary")
|
|
664
1197
|
lines.append("")
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
spec_sigma = spectral_info.get("sigma_quantile")
|
|
677
|
-
spec_deadband = spectral_info.get("deadband")
|
|
678
|
-
spec_max_caps = spectral_info.get("max_caps")
|
|
679
|
-
summary_yaml = {
|
|
680
|
-
"sigma_quantile": float(spec_sigma)
|
|
681
|
-
if isinstance(spec_sigma, int | float)
|
|
682
|
-
else None,
|
|
683
|
-
"deadband": float(spec_deadband)
|
|
684
|
-
if isinstance(spec_deadband, int | float)
|
|
685
|
-
else None,
|
|
686
|
-
"max_caps": int(spec_max_caps)
|
|
687
|
-
if isinstance(spec_max_caps, int | float)
|
|
688
|
-
else None,
|
|
689
|
-
}
|
|
690
|
-
# Drop Nones from summary
|
|
691
|
-
summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
|
|
692
|
-
if summary_yaml:
|
|
693
|
-
lines.append("- **Spectral Summary:**")
|
|
694
|
-
lines.append(" ```yaml")
|
|
695
|
-
for line in (
|
|
696
|
-
yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
|
|
697
|
-
.strip()
|
|
698
|
-
.splitlines()
|
|
699
|
-
):
|
|
700
|
-
lines.append(f" {line}")
|
|
701
|
-
lines.append(" ```")
|
|
1198
|
+
lines.append("| Metric | Value | Status |")
|
|
1199
|
+
lines.append("|--------|-------|--------|")
|
|
1200
|
+
|
|
1201
|
+
spectral_ok = bool(validation.get("spectral_stable", False))
|
|
1202
|
+
caps_applied = spectral_info.get("caps_applied")
|
|
1203
|
+
max_caps = spectral_info.get("max_caps")
|
|
1204
|
+
caps_val = (
|
|
1205
|
+
f"{caps_applied}/{max_caps}"
|
|
1206
|
+
if caps_applied is not None and max_caps is not None
|
|
1207
|
+
else "-"
|
|
1208
|
+
)
|
|
702
1209
|
lines.append(
|
|
703
|
-
f"
|
|
1210
|
+
f"| Caps Applied | {caps_val} | {'✅ OK' if spectral_ok else '❌ FAIL'} |"
|
|
704
1211
|
)
|
|
1212
|
+
|
|
705
1213
|
summary = spectral_info.get("summary", {}) or {}
|
|
706
|
-
|
|
707
|
-
|
|
1214
|
+
caps_exceeded = summary.get("caps_exceeded")
|
|
1215
|
+
if caps_exceeded is not None:
|
|
1216
|
+
cap_status = "✅ OK" if not bool(caps_exceeded) else "⚠️ WARN"
|
|
1217
|
+
lines.append(f"| Caps Exceeded | {caps_exceeded} | {cap_status} |")
|
|
1218
|
+
|
|
1219
|
+
top_scores = spectral_info.get("top_z_scores") or {}
|
|
1220
|
+
max_family: str | None = None
|
|
1221
|
+
max_module: str | None = None
|
|
1222
|
+
max_abs_z: float | None = None
|
|
1223
|
+
if isinstance(top_scores, dict):
|
|
1224
|
+
for family, entries in top_scores.items():
|
|
1225
|
+
if not isinstance(entries, list):
|
|
1226
|
+
continue
|
|
1227
|
+
for entry in entries:
|
|
1228
|
+
if not isinstance(entry, dict):
|
|
1229
|
+
continue
|
|
1230
|
+
z_val = entry.get("z")
|
|
1231
|
+
if not (
|
|
1232
|
+
isinstance(z_val, int | float) and math.isfinite(float(z_val))
|
|
1233
|
+
):
|
|
1234
|
+
continue
|
|
1235
|
+
z_abs = abs(float(z_val))
|
|
1236
|
+
if max_abs_z is None or z_abs > max_abs_z:
|
|
1237
|
+
max_abs_z = z_abs
|
|
1238
|
+
max_family = str(family)
|
|
1239
|
+
max_module = (
|
|
1240
|
+
str(entry.get("module")) if entry.get("module") else None
|
|
1241
|
+
)
|
|
1242
|
+
|
|
708
1243
|
family_caps = spectral_info.get("family_caps") or {}
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
kappa =
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
1244
|
+
kappa = None
|
|
1245
|
+
if max_family and isinstance(family_caps, dict):
|
|
1246
|
+
try:
|
|
1247
|
+
kappa = (family_caps.get(max_family, {}) or {}).get("kappa")
|
|
1248
|
+
except Exception:
|
|
1249
|
+
kappa = None
|
|
1250
|
+
kappa_f = (
|
|
1251
|
+
float(kappa)
|
|
1252
|
+
if isinstance(kappa, int | float) and math.isfinite(float(kappa))
|
|
1253
|
+
else None
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
if max_abs_z is not None:
|
|
1257
|
+
max_val = f"{max_abs_z:.3f}"
|
|
1258
|
+
if max_family:
|
|
1259
|
+
max_val += f" ({max_family})"
|
|
1260
|
+
if max_module:
|
|
1261
|
+
max_val += f" – {max_module}"
|
|
1262
|
+
if kappa_f is None:
|
|
1263
|
+
max_status = "🛈 No κ"
|
|
1264
|
+
elif max_abs_z <= kappa_f:
|
|
1265
|
+
max_status = f"✅ Within κ={kappa_f:.3f}"
|
|
1266
|
+
else:
|
|
1267
|
+
max_status = f"❌ Exceeds κ={kappa_f:.3f}"
|
|
1268
|
+
lines.append(f"| Max |z| | {max_val} | {max_status} |")
|
|
1269
|
+
|
|
1270
|
+
mt_info = spectral_info.get("multiple_testing", {}) or {}
|
|
1271
|
+
if isinstance(mt_info, dict) and mt_info:
|
|
1272
|
+
mt_method = mt_info.get("method")
|
|
1273
|
+
mt_alpha = mt_info.get("alpha")
|
|
1274
|
+
mt_m = mt_info.get("m")
|
|
1275
|
+
parts: list[str] = []
|
|
1276
|
+
if mt_method:
|
|
1277
|
+
parts.append(f"method={mt_method}")
|
|
1278
|
+
if isinstance(mt_alpha, int | float) and math.isfinite(float(mt_alpha)):
|
|
1279
|
+
parts.append(f"α={float(mt_alpha):.3g}")
|
|
1280
|
+
if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
|
|
1281
|
+
parts.append(f"m={int(mt_m)}")
|
|
1282
|
+
lines.append(
|
|
1283
|
+
f"| Multiple Testing | {', '.join(parts) if parts else '—'} | 🛈 INFO |"
|
|
1284
|
+
)
|
|
1285
|
+
|
|
1286
|
+
lines.append("")
|
|
1287
|
+
|
|
1288
|
+
caps_by_family = spectral_info.get("caps_applied_by_family") or {}
|
|
721
1289
|
quantiles = spectral_info.get("family_z_quantiles") or {}
|
|
722
|
-
if
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
1290
|
+
if any(
|
|
1291
|
+
bool(x)
|
|
1292
|
+
for x in (caps_by_family, quantiles, family_caps, top_scores)
|
|
1293
|
+
if isinstance(x, dict)
|
|
1294
|
+
):
|
|
1295
|
+
lines.append("<details>")
|
|
1296
|
+
lines.append("<summary>Per-family details</summary>")
|
|
1297
|
+
lines.append("")
|
|
1298
|
+
lines.append("| Family | κ | q95 | Max |z| | Violations |")
|
|
1299
|
+
lines.append("|--------|---|-----|--------|------------|")
|
|
1300
|
+
|
|
1301
|
+
families: set[str] = set()
|
|
1302
|
+
for block in (caps_by_family, quantiles, family_caps, top_scores):
|
|
1303
|
+
if isinstance(block, dict):
|
|
1304
|
+
families.update(str(k) for k in block.keys())
|
|
1305
|
+
|
|
1306
|
+
for family in sorted(families):
|
|
1307
|
+
kappa = None
|
|
1308
|
+
if isinstance(family_caps, dict):
|
|
1309
|
+
kappa = (family_caps.get(family, {}) or {}).get("kappa")
|
|
1310
|
+
kappa_str = (
|
|
1311
|
+
f"{float(kappa):.3f}"
|
|
1312
|
+
if isinstance(kappa, int | float) and math.isfinite(float(kappa))
|
|
1313
|
+
else "-"
|
|
1314
|
+
)
|
|
1315
|
+
|
|
1316
|
+
q95 = None
|
|
1317
|
+
max_z = None
|
|
1318
|
+
if isinstance(quantiles, dict):
|
|
1319
|
+
stats = quantiles.get(family) or {}
|
|
1320
|
+
if isinstance(stats, dict):
|
|
1321
|
+
q95 = stats.get("q95")
|
|
1322
|
+
max_z = stats.get("max")
|
|
730
1323
|
q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
|
|
731
|
-
q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
|
|
732
1324
|
max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
|
|
733
|
-
|
|
1325
|
+
|
|
1326
|
+
violations = None
|
|
1327
|
+
if isinstance(caps_by_family, dict):
|
|
1328
|
+
violations = caps_by_family.get(family)
|
|
1329
|
+
v_str = (
|
|
1330
|
+
str(int(violations)) if isinstance(violations, int | float) else "0"
|
|
1331
|
+
)
|
|
1332
|
+
|
|
734
1333
|
lines.append(
|
|
735
|
-
f"| {family} | {
|
|
1334
|
+
f"| {family} | {kappa_str} | {q95_str} | {max_str} | {v_str} |"
|
|
736
1335
|
)
|
|
1336
|
+
|
|
1337
|
+
if isinstance(top_scores, dict) and top_scores:
|
|
1338
|
+
lines.append("")
|
|
1339
|
+
lines.append("Top |z| per family:")
|
|
1340
|
+
for family in sorted(top_scores.keys()):
|
|
1341
|
+
entries = top_scores[family]
|
|
1342
|
+
if not isinstance(entries, list) or not entries:
|
|
1343
|
+
continue
|
|
1344
|
+
formatted_entries = []
|
|
1345
|
+
for entry in entries:
|
|
1346
|
+
if not isinstance(entry, dict):
|
|
1347
|
+
continue
|
|
1348
|
+
module_name = entry.get("module", "unknown")
|
|
1349
|
+
z_val = entry.get("z")
|
|
1350
|
+
if isinstance(z_val, int | float) and math.isfinite(
|
|
1351
|
+
float(z_val)
|
|
1352
|
+
):
|
|
1353
|
+
z_str = f"{z_val:.3f}"
|
|
1354
|
+
else:
|
|
1355
|
+
z_str = "n/a"
|
|
1356
|
+
formatted_entries.append(f"{module_name} (|z|={z_str})")
|
|
1357
|
+
lines.append(f"- {family}: {', '.join(formatted_entries)}")
|
|
1358
|
+
|
|
737
1359
|
lines.append("")
|
|
738
|
-
|
|
739
|
-
if policy_caps:
|
|
740
|
-
lines.append("- **Family κ (policy):**")
|
|
741
|
-
lines.append(" ```yaml")
|
|
742
|
-
caps_yaml = (
|
|
743
|
-
yaml.safe_dump(policy_caps, sort_keys=True, width=70)
|
|
744
|
-
.strip()
|
|
745
|
-
.splitlines()
|
|
746
|
-
)
|
|
747
|
-
for line in caps_yaml:
|
|
748
|
-
lines.append(f" {line}")
|
|
749
|
-
lines.append(" ```")
|
|
750
|
-
top_scores = spectral_info.get("top_z_scores") or {}
|
|
751
|
-
if top_scores:
|
|
752
|
-
lines.append("Top |z| per family:")
|
|
753
|
-
for family in sorted(top_scores.keys()):
|
|
754
|
-
entries = top_scores[family]
|
|
755
|
-
if not entries:
|
|
756
|
-
continue
|
|
757
|
-
formatted_entries = []
|
|
758
|
-
for entry in entries:
|
|
759
|
-
module_name = entry.get("module", "unknown")
|
|
760
|
-
z_val = entry.get("z")
|
|
761
|
-
if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
|
|
762
|
-
z_str = f"{z_val:.3f}"
|
|
763
|
-
else:
|
|
764
|
-
z_str = "n/a"
|
|
765
|
-
formatted_entries.append(f"{module_name} (|z|={z_str})")
|
|
766
|
-
lines.append(f"- {family}: {', '.join(formatted_entries)}")
|
|
1360
|
+
lines.append("</details>")
|
|
767
1361
|
lines.append("")
|
|
768
1362
|
|
|
769
1363
|
rmt_info = certificate.get("rmt", {}) or {}
|
|
@@ -771,7 +1365,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
771
1365
|
lines.append("### RMT Guard")
|
|
772
1366
|
lines.append("")
|
|
773
1367
|
families = rmt_info.get("families") or {}
|
|
1368
|
+
stable = bool(rmt_info.get("stable", True))
|
|
1369
|
+
status = "✅ OK" if stable else "❌ FAIL"
|
|
1370
|
+
delta_total = rmt_info.get("delta_total")
|
|
1371
|
+
if isinstance(delta_total, int):
|
|
1372
|
+
lines.append(f"- Δ total: {delta_total:+d}")
|
|
1373
|
+
lines.append(f"- Status: {status}")
|
|
1374
|
+
lines.append(f"- Families: {len(families)}")
|
|
774
1375
|
if families:
|
|
1376
|
+
lines.append("")
|
|
1377
|
+
lines.append("<details>")
|
|
1378
|
+
lines.append("<summary>RMT family details</summary>")
|
|
1379
|
+
lines.append("")
|
|
775
1380
|
lines.append("| Family | ε_f | Bare | Guarded | Δ |")
|
|
776
1381
|
lines.append("|--------|-----|------|---------|---|")
|
|
777
1382
|
for family, data in families.items():
|
|
@@ -801,12 +1406,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
801
1406
|
f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
|
|
802
1407
|
)
|
|
803
1408
|
lines.append("")
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
lines.append(
|
|
808
|
-
lines.append(f"- Stable: {rmt_info.get('stable', True)}")
|
|
809
|
-
lines.append("")
|
|
1409
|
+
lines.append("</details>")
|
|
1410
|
+
lines.append("")
|
|
1411
|
+
else:
|
|
1412
|
+
lines.append("")
|
|
810
1413
|
|
|
811
1414
|
guard_overhead_info = certificate.get("guard_overhead", {}) or {}
|
|
812
1415
|
if guard_overhead_info:
|
|
@@ -863,21 +1466,21 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
863
1466
|
inference_sources = compression_diag.get("inference_source") or {}
|
|
864
1467
|
inference_log = compression_diag.get("inference_log") or []
|
|
865
1468
|
if inference_flags or inference_sources or inference_log:
|
|
866
|
-
|
|
867
|
-
|
|
1469
|
+
appendix_lines.append("### Inference Diagnostics")
|
|
1470
|
+
appendix_lines.append("")
|
|
868
1471
|
if inference_flags:
|
|
869
|
-
|
|
1472
|
+
appendix_lines.append("- **Fields Inferred:**")
|
|
870
1473
|
for field, flag in inference_flags.items():
|
|
871
|
-
|
|
1474
|
+
appendix_lines.append(f" - {field}: {'yes' if flag else 'no'}")
|
|
872
1475
|
if inference_sources:
|
|
873
|
-
|
|
1476
|
+
appendix_lines.append("- **Sources:**")
|
|
874
1477
|
for field, source in inference_sources.items():
|
|
875
|
-
|
|
1478
|
+
appendix_lines.append(f" - {field}: {source}")
|
|
876
1479
|
if inference_log:
|
|
877
|
-
|
|
1480
|
+
appendix_lines.append("- **Inference Log:**")
|
|
878
1481
|
for entry in inference_log:
|
|
879
|
-
|
|
880
|
-
|
|
1482
|
+
appendix_lines.append(f" - {entry}")
|
|
1483
|
+
appendix_lines.append("")
|
|
881
1484
|
|
|
882
1485
|
# Model and Configuration
|
|
883
1486
|
lines.append("## Model Information")
|
|
@@ -906,28 +1509,48 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
906
1509
|
if invarlock_version:
|
|
907
1510
|
lines.append(f"- **InvarLock Version:** {invarlock_version}")
|
|
908
1511
|
env_flags = meta.get("env_flags")
|
|
909
|
-
if isinstance(env_flags, dict) and env_flags:
|
|
910
|
-
lines.append("- **Env Flags:**")
|
|
911
|
-
lines.append(" ```yaml")
|
|
912
|
-
for k, v in env_flags.items():
|
|
913
|
-
lines.append(f" {k}: {v}")
|
|
914
|
-
lines.append(" ```")
|
|
915
|
-
# Determinism flags (if present)
|
|
916
1512
|
cuda_flags = meta.get("cuda_flags")
|
|
1513
|
+
|
|
1514
|
+
# Compressed determinism/environment summary for readability
|
|
1515
|
+
det_parts: list[str] = []
|
|
1516
|
+
for label, keys in (
|
|
1517
|
+
("torch_det", ("torch_deterministic_algorithms", "deterministic_algorithms")),
|
|
1518
|
+
("cudnn_det", ("cudnn_deterministic",)),
|
|
1519
|
+
("cudnn_bench", ("cudnn_benchmark",)),
|
|
1520
|
+
("tf32_matmul", ("cuda_matmul_allow_tf32",)),
|
|
1521
|
+
("tf32_cudnn", ("cudnn_allow_tf32",)),
|
|
1522
|
+
("cublas_ws", ("CUBLAS_WORKSPACE_CONFIG",)),
|
|
1523
|
+
):
|
|
1524
|
+
val = None
|
|
1525
|
+
for key in keys:
|
|
1526
|
+
if isinstance(env_flags, dict) and env_flags.get(key) is not None:
|
|
1527
|
+
val = env_flags.get(key)
|
|
1528
|
+
break
|
|
1529
|
+
if isinstance(cuda_flags, dict) and cuda_flags.get(key) is not None:
|
|
1530
|
+
val = cuda_flags.get(key)
|
|
1531
|
+
break
|
|
1532
|
+
if val is not None:
|
|
1533
|
+
det_parts.append(f"{label}={val}")
|
|
1534
|
+
if det_parts:
|
|
1535
|
+
lines.append(f"- **Determinism:** {', '.join(det_parts)}")
|
|
1536
|
+
|
|
1537
|
+
full_flags: dict[str, Any] = {}
|
|
1538
|
+
if isinstance(env_flags, dict) and env_flags:
|
|
1539
|
+
full_flags["env_flags"] = env_flags
|
|
917
1540
|
if isinstance(cuda_flags, dict) and cuda_flags:
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
):
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
1541
|
+
full_flags["cuda_flags"] = cuda_flags
|
|
1542
|
+
if full_flags:
|
|
1543
|
+
lines.append("")
|
|
1544
|
+
lines.append("<details>")
|
|
1545
|
+
lines.append("<summary>Environment flags (full)</summary>")
|
|
1546
|
+
lines.append("")
|
|
1547
|
+
lines.append("```yaml")
|
|
1548
|
+
flags_yaml = yaml.safe_dump(full_flags, sort_keys=True, width=80).strip()
|
|
1549
|
+
for line in flags_yaml.splitlines():
|
|
1550
|
+
lines.append(line)
|
|
1551
|
+
lines.append("```")
|
|
1552
|
+
lines.append("")
|
|
1553
|
+
lines.append("</details>")
|
|
931
1554
|
lines.append("")
|
|
932
1555
|
|
|
933
1556
|
# Edit Configuration (removed duplicate Edit Information section)
|
|
@@ -951,267 +1574,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
951
1574
|
pass
|
|
952
1575
|
lines.append("")
|
|
953
1576
|
|
|
954
|
-
|
|
955
|
-
if resolved_policy:
|
|
956
|
-
lines.append("## Resolved Policy")
|
|
957
|
-
lines.append("")
|
|
958
|
-
lines.append("```yaml")
|
|
959
|
-
resolved_yaml = yaml.safe_dump(
|
|
960
|
-
resolved_policy, sort_keys=True, width=80, default_flow_style=False
|
|
961
|
-
).strip()
|
|
962
|
-
for line in resolved_yaml.splitlines():
|
|
963
|
-
lines.append(line)
|
|
964
|
-
lines.append("```")
|
|
965
|
-
lines.append("")
|
|
966
|
-
|
|
967
|
-
policy_provenance = certificate.get("policy_provenance", {})
|
|
968
|
-
if policy_provenance:
|
|
969
|
-
lines.append("## Policy Provenance")
|
|
970
|
-
lines.append("")
|
|
971
|
-
lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
|
|
972
|
-
overrides_list = policy_provenance.get("overrides") or []
|
|
973
|
-
if overrides_list:
|
|
974
|
-
lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
|
|
975
|
-
else:
|
|
976
|
-
lines.append("- **Overrides:** (none)")
|
|
977
|
-
digest_value = policy_provenance.get("policy_digest")
|
|
978
|
-
if digest_value:
|
|
979
|
-
lines.append(f"- **Policy Digest:** `{digest_value}`")
|
|
980
|
-
else:
|
|
981
|
-
lines.append("- **Policy Digest:** (not recorded)")
|
|
982
|
-
if policy_provenance.get("resolved_at"):
|
|
983
|
-
lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
|
|
984
|
-
lines.append("")
|
|
985
|
-
|
|
986
|
-
# Dataset Information
|
|
987
|
-
lines.append("## Dataset Configuration")
|
|
988
|
-
lines.append("")
|
|
989
|
-
dataset = certificate.get("dataset", {}) or {}
|
|
990
|
-
prov = (
|
|
991
|
-
(dataset.get("provider") or "unknown")
|
|
992
|
-
if isinstance(dataset, dict)
|
|
993
|
-
else "unknown"
|
|
994
|
-
)
|
|
995
|
-
lines.append(f"- **Provider:** {prov}")
|
|
996
|
-
try:
|
|
997
|
-
seq_len_val = (
|
|
998
|
-
int(dataset.get("seq_len"))
|
|
999
|
-
if isinstance(dataset.get("seq_len"), int | float)
|
|
1000
|
-
else dataset.get("seq_len")
|
|
1001
|
-
)
|
|
1002
|
-
except Exception: # pragma: no cover - defensive
|
|
1003
|
-
seq_len_val = dataset.get("seq_len")
|
|
1004
|
-
if seq_len_val is not None:
|
|
1005
|
-
lines.append(f"- **Sequence Length:** {seq_len_val}")
|
|
1006
|
-
windows_blk = (
|
|
1007
|
-
dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
|
|
1008
|
-
)
|
|
1009
|
-
win_prev = windows_blk.get("preview")
|
|
1010
|
-
win_final = windows_blk.get("final")
|
|
1011
|
-
if win_prev is not None and win_final is not None:
|
|
1012
|
-
lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
|
|
1013
|
-
if windows_blk.get("seed") is not None:
|
|
1014
|
-
lines.append(f"- **Seed:** {windows_blk.get('seed')}")
|
|
1015
|
-
hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
|
|
1016
|
-
if hash_blk.get("preview_tokens") is not None:
|
|
1017
|
-
lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
|
|
1018
|
-
if hash_blk.get("final_tokens") is not None:
|
|
1019
|
-
lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
|
|
1020
|
-
if hash_blk.get("total_tokens") is not None:
|
|
1021
|
-
lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
|
|
1022
|
-
if hash_blk.get("dataset"):
|
|
1023
|
-
lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
|
|
1024
|
-
tokenizer = dataset.get("tokenizer", {})
|
|
1025
|
-
if tokenizer.get("name") or tokenizer.get("hash"):
|
|
1026
|
-
vocab_size = tokenizer.get("vocab_size")
|
|
1027
|
-
vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
|
|
1028
|
-
lines.append(
|
|
1029
|
-
f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
|
|
1030
|
-
)
|
|
1031
|
-
if tokenizer.get("hash"):
|
|
1032
|
-
lines.append(f" - Hash: {tokenizer['hash']}")
|
|
1033
|
-
lines.append(
|
|
1034
|
-
f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
|
|
1035
|
-
)
|
|
1036
|
-
if tokenizer.get("pad_token") is not None:
|
|
1037
|
-
lines.append(f" - PAD: {tokenizer.get('pad_token')}")
|
|
1038
|
-
if tokenizer.get("add_prefix_space") is not None:
|
|
1039
|
-
lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
|
|
1040
|
-
lines.append("")
|
|
1041
|
-
|
|
1042
|
-
provenance_info = certificate.get("provenance", {}) or {}
|
|
1043
|
-
if provenance_info:
|
|
1044
|
-
lines.append("## Run Provenance")
|
|
1045
|
-
lines.append("")
|
|
1046
|
-
baseline_info = provenance_info.get("baseline", {}) or {}
|
|
1047
|
-
if baseline_info:
|
|
1048
|
-
lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
|
|
1049
|
-
if baseline_info.get("report_hash"):
|
|
1050
|
-
lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
|
|
1051
|
-
if baseline_info.get("report_path"):
|
|
1052
|
-
lines.append(f" - Report Path: {baseline_info.get('report_path')}")
|
|
1053
|
-
edited_info = provenance_info.get("edited", {}) or {}
|
|
1054
|
-
if edited_info:
|
|
1055
|
-
lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
|
|
1056
|
-
if edited_info.get("report_hash"):
|
|
1057
|
-
lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
|
|
1058
|
-
if edited_info.get("report_path"):
|
|
1059
|
-
lines.append(f" - Report Path: {edited_info.get('report_path')}")
|
|
1060
|
-
window_plan = provenance_info.get("window_plan")
|
|
1061
|
-
if isinstance(window_plan, dict) and window_plan:
|
|
1062
|
-
preview_val = window_plan.get(
|
|
1063
|
-
"preview_n", window_plan.get("actual_preview")
|
|
1064
|
-
)
|
|
1065
|
-
final_val = window_plan.get("final_n", window_plan.get("actual_final"))
|
|
1066
|
-
lines.append(
|
|
1067
|
-
f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
|
|
1068
|
-
)
|
|
1069
|
-
provider_digest = provenance_info.get("provider_digest")
|
|
1070
|
-
if isinstance(provider_digest, dict) and provider_digest:
|
|
1071
|
-
ids_d = provider_digest.get("ids_sha256")
|
|
1072
|
-
tok_d = provider_digest.get("tokenizer_sha256")
|
|
1073
|
-
mask_d = provider_digest.get("masking_sha256")
|
|
1074
|
-
|
|
1075
|
-
lines.append("- **Provider Digest:**")
|
|
1076
|
-
if tok_d:
|
|
1077
|
-
lines.append(
|
|
1078
|
-
f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
|
|
1079
|
-
)
|
|
1080
|
-
if ids_d:
|
|
1081
|
-
lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
|
|
1082
|
-
if mask_d:
|
|
1083
|
-
lines.append(
|
|
1084
|
-
f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
|
|
1085
|
-
)
|
|
1086
|
-
# Surface confidence label prominently
|
|
1087
|
-
try:
|
|
1088
|
-
conf = certificate.get("confidence", {}) or {}
|
|
1089
|
-
if isinstance(conf, dict) and conf.get("label"):
|
|
1090
|
-
lines.append(f"- **Confidence:** {conf.get('label')}")
|
|
1091
|
-
except Exception:
|
|
1092
|
-
pass
|
|
1093
|
-
# Surface policy version + thresholds hash (short)
|
|
1094
|
-
try:
|
|
1095
|
-
pd = certificate.get("policy_digest", {}) or {}
|
|
1096
|
-
if isinstance(pd, dict) and pd:
|
|
1097
|
-
pv = pd.get("policy_version")
|
|
1098
|
-
th = pd.get("thresholds_hash")
|
|
1099
|
-
if pv:
|
|
1100
|
-
lines.append(f"- **Policy Version:** {pv}")
|
|
1101
|
-
if isinstance(th, str) and th:
|
|
1102
|
-
short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
|
|
1103
|
-
lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
|
|
1104
|
-
if pd.get("changed"):
|
|
1105
|
-
lines.append("- Note: policy changed")
|
|
1106
|
-
except Exception:
|
|
1107
|
-
pass
|
|
1108
|
-
lines.append("")
|
|
1577
|
+
_append_dataset_and_provenance_section(lines, certificate)
|
|
1109
1578
|
|
|
1110
1579
|
# Structural Changes heading is printed with content later; avoid empty header here
|
|
1111
1580
|
|
|
1112
|
-
# Primary Metric (metric-v1) snapshot, if present
|
|
1113
|
-
try:
|
|
1114
|
-
pm = certificate.get("primary_metric")
|
|
1115
|
-
if isinstance(pm, dict) and pm:
|
|
1116
|
-
kind = pm.get("kind", "unknown")
|
|
1117
|
-
lines.append(f"## Primary Metric ({kind})")
|
|
1118
|
-
lines.append("")
|
|
1119
|
-
unit = pm.get("unit", "-")
|
|
1120
|
-
paired = pm.get("paired", False)
|
|
1121
|
-
reps = None
|
|
1122
|
-
# Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
|
|
1123
|
-
# Mark estimated metrics (e.g., pseudo accuracy counts) clearly
|
|
1124
|
-
estimated_flag = False
|
|
1125
|
-
try:
|
|
1126
|
-
if bool(pm.get("estimated")):
|
|
1127
|
-
estimated_flag = True
|
|
1128
|
-
elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
|
|
1129
|
-
estimated_flag = True
|
|
1130
|
-
except Exception:
|
|
1131
|
-
estimated_flag = False
|
|
1132
|
-
est_suffix = " (estimated)" if estimated_flag else ""
|
|
1133
|
-
lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
|
|
1134
|
-
gating_basis = pm.get("gating_basis") or pm.get("basis")
|
|
1135
|
-
if gating_basis:
|
|
1136
|
-
lines.append(f"- Basis: {gating_basis}")
|
|
1137
|
-
if isinstance(paired, bool):
|
|
1138
|
-
lines.append(f"- Paired: {paired}")
|
|
1139
|
-
reps = pm.get("reps")
|
|
1140
|
-
if isinstance(reps, int | float):
|
|
1141
|
-
lines.append(f"- Bootstrap Reps: {int(reps)}")
|
|
1142
|
-
ci = pm.get("ci") or pm.get("display_ci")
|
|
1143
|
-
if (
|
|
1144
|
-
isinstance(ci, list | tuple)
|
|
1145
|
-
and len(ci) == 2
|
|
1146
|
-
and all(isinstance(x, int | float) for x in ci)
|
|
1147
|
-
):
|
|
1148
|
-
lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
|
|
1149
|
-
prev = pm.get("preview")
|
|
1150
|
-
fin = pm.get("final")
|
|
1151
|
-
ratio = pm.get("ratio_vs_baseline")
|
|
1152
|
-
|
|
1153
|
-
lines.append("")
|
|
1154
|
-
if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
|
|
1155
|
-
lines.append(
|
|
1156
|
-
"- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
|
|
1157
|
-
)
|
|
1158
|
-
lines.append("| Field | Value |")
|
|
1159
|
-
lines.append("|-------|-------|")
|
|
1160
|
-
lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
|
|
1161
|
-
lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
|
|
1162
|
-
# For accuracy, ratio field is actually a delta (as per helper); clarify inline
|
|
1163
|
-
if kind in {"accuracy", "vqa_accuracy"}:
|
|
1164
|
-
lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
|
|
1165
|
-
# When baseline accuracy is near-zero, clarify display rule
|
|
1166
|
-
try:
|
|
1167
|
-
base_pt = pm.get("baseline_point")
|
|
1168
|
-
if isinstance(base_pt, int | float) and base_pt < 0.05:
|
|
1169
|
-
lines.append(
|
|
1170
|
-
"- Note: baseline < 5%; ratio suppressed; showing Δpp"
|
|
1171
|
-
)
|
|
1172
|
-
except Exception:
|
|
1173
|
-
pass
|
|
1174
|
-
else:
|
|
1175
|
-
try:
|
|
1176
|
-
lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
|
|
1177
|
-
except Exception:
|
|
1178
|
-
lines.append("| Ratio vs Baseline | N/A |")
|
|
1179
|
-
lines.append("")
|
|
1180
|
-
# Secondary metrics (informational)
|
|
1181
|
-
try:
|
|
1182
|
-
secs = certificate.get("secondary_metrics")
|
|
1183
|
-
if isinstance(secs, list) and secs:
|
|
1184
|
-
lines.append("## Secondary Metrics (informational)")
|
|
1185
|
-
lines.append("")
|
|
1186
|
-
lines.append("| Kind | Preview | Final | vs Baseline | CI |")
|
|
1187
|
-
lines.append("|------|---------|-------|-------------|----|")
|
|
1188
|
-
for m in secs:
|
|
1189
|
-
if not isinstance(m, dict):
|
|
1190
|
-
continue
|
|
1191
|
-
k = m.get("kind", "?")
|
|
1192
|
-
pv = _fmt_by_kind(m.get("preview"), str(k))
|
|
1193
|
-
fv = _fmt_by_kind(m.get("final"), str(k))
|
|
1194
|
-
rb = m.get("ratio_vs_baseline")
|
|
1195
|
-
try:
|
|
1196
|
-
rb_str = (
|
|
1197
|
-
f"{float(rb):.3f}"
|
|
1198
|
-
if (str(k).startswith("ppl"))
|
|
1199
|
-
else _fmt_by_kind(rb, str(k))
|
|
1200
|
-
)
|
|
1201
|
-
except Exception:
|
|
1202
|
-
rb_str = "N/A"
|
|
1203
|
-
ci = m.get("display_ci") or m.get("ci")
|
|
1204
|
-
if isinstance(ci, tuple | list) and len(ci) == 2:
|
|
1205
|
-
ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
|
|
1206
|
-
else:
|
|
1207
|
-
ci_str = "–"
|
|
1208
|
-
lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
|
|
1209
|
-
lines.append("")
|
|
1210
|
-
except Exception:
|
|
1211
|
-
pass
|
|
1212
|
-
except Exception:
|
|
1213
|
-
pass
|
|
1214
|
-
|
|
1215
1581
|
# System Overhead section (latency/throughput)
|
|
1216
1582
|
sys_over = certificate.get("system_overhead", {}) or {}
|
|
1217
1583
|
if isinstance(sys_over, dict) and sys_over:
|
|
@@ -1370,31 +1736,32 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1370
1736
|
|
|
1371
1737
|
# Variance Guard (Spectral/RMT summaries are already provided above)
|
|
1372
1738
|
variance = certificate["variance"]
|
|
1373
|
-
|
|
1739
|
+
appendix_lines.append("### Variance Guard")
|
|
1740
|
+
appendix_lines.append("")
|
|
1374
1741
|
|
|
1375
1742
|
# Display whether VE was enabled after A/B test
|
|
1376
|
-
|
|
1743
|
+
appendix_lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
|
|
1377
1744
|
|
|
1378
1745
|
if variance["enabled"]:
|
|
1379
1746
|
# VE was enabled - show the gain
|
|
1380
1747
|
gain_value = variance.get("gain", "N/A")
|
|
1381
1748
|
if isinstance(gain_value, int | float):
|
|
1382
|
-
|
|
1749
|
+
appendix_lines.append(f"- **Gain:** {gain_value:.3f}")
|
|
1383
1750
|
else:
|
|
1384
|
-
|
|
1751
|
+
appendix_lines.append(f"- **Gain:** {gain_value}")
|
|
1385
1752
|
else:
|
|
1386
1753
|
# VE was not enabled - show succinct reason if available, else a clear disabled message
|
|
1387
1754
|
ppl_no_ve = variance.get("ppl_no_ve")
|
|
1388
1755
|
ppl_with_ve = variance.get("ppl_with_ve")
|
|
1389
1756
|
ratio_ci = variance.get("ratio_ci")
|
|
1390
1757
|
if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
|
|
1391
|
-
|
|
1392
|
-
|
|
1758
|
+
appendix_lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
|
|
1759
|
+
appendix_lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
|
|
1393
1760
|
gain_value = variance.get("gain")
|
|
1394
1761
|
if isinstance(gain_value, int | float):
|
|
1395
|
-
|
|
1762
|
+
appendix_lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
|
|
1396
1763
|
else:
|
|
1397
|
-
|
|
1764
|
+
appendix_lines.append(
|
|
1398
1765
|
"- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
|
|
1399
1766
|
)
|
|
1400
1767
|
# Add concise rationale aligned with Balanced predictive gate contract
|
|
@@ -1402,14 +1769,14 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1402
1769
|
ve_policy = certificate.get("policies", {}).get("variance", {})
|
|
1403
1770
|
min_effect = ve_policy.get("min_effect_lognll")
|
|
1404
1771
|
if isinstance(min_effect, int | float):
|
|
1405
|
-
|
|
1772
|
+
appendix_lines.append(
|
|
1406
1773
|
f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
|
|
1407
1774
|
)
|
|
1408
1775
|
else:
|
|
1409
|
-
|
|
1776
|
+
appendix_lines.append(
|
|
1410
1777
|
"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
|
|
1411
1778
|
)
|
|
1412
|
-
|
|
1779
|
+
appendix_lines.append(
|
|
1413
1780
|
"- Predictive Gate: evaluated=false (disabled under current policy/edit)."
|
|
1414
1781
|
)
|
|
1415
1782
|
except Exception:
|
|
@@ -1417,14 +1784,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1417
1784
|
|
|
1418
1785
|
if variance.get("ratio_ci"):
|
|
1419
1786
|
ratio_lo, ratio_hi = variance["ratio_ci"]
|
|
1420
|
-
|
|
1787
|
+
appendix_lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
|
|
1421
1788
|
|
|
1422
1789
|
if variance.get("calibration") and variance.get("enabled"):
|
|
1423
1790
|
calib = variance["calibration"]
|
|
1424
1791
|
coverage = calib.get("coverage")
|
|
1425
1792
|
requested = calib.get("requested")
|
|
1426
1793
|
status = calib.get("status", "unknown")
|
|
1427
|
-
|
|
1794
|
+
appendix_lines.append(
|
|
1795
|
+
f"- **Calibration:** {coverage}/{requested} windows ({status})"
|
|
1796
|
+
)
|
|
1797
|
+
appendix_lines.append("")
|
|
1428
1798
|
|
|
1429
1799
|
lines.append("")
|
|
1430
1800
|
|
|
@@ -1458,32 +1828,22 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1458
1828
|
lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
|
|
1459
1829
|
lines.append("")
|
|
1460
1830
|
|
|
1461
|
-
|
|
1462
|
-
lines.append("## Applied Policies")
|
|
1463
|
-
lines.append("")
|
|
1464
|
-
policies = certificate["policies"]
|
|
1465
|
-
for guard_name, policy in policies.items():
|
|
1466
|
-
lines.append(f"### {guard_name.title()}")
|
|
1467
|
-
lines.append("")
|
|
1468
|
-
policy_yaml = (
|
|
1469
|
-
yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
|
|
1470
|
-
)
|
|
1471
|
-
lines.append("```yaml")
|
|
1472
|
-
for line in policy_yaml:
|
|
1473
|
-
lines.append(line)
|
|
1474
|
-
lines.append("```")
|
|
1475
|
-
lines.append("")
|
|
1831
|
+
_append_policy_configuration_section(lines, certificate)
|
|
1476
1832
|
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
lines.append("")
|
|
1833
|
+
appendix_lines.append("### Artifacts")
|
|
1834
|
+
appendix_lines.append("")
|
|
1480
1835
|
artifacts = certificate["artifacts"]
|
|
1481
1836
|
if artifacts.get("events_path"):
|
|
1482
|
-
|
|
1837
|
+
appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
|
|
1483
1838
|
if artifacts.get("report_path"):
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1839
|
+
appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
|
|
1840
|
+
appendix_lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
|
|
1841
|
+
appendix_lines.append("")
|
|
1842
|
+
|
|
1843
|
+
if appendix_lines:
|
|
1844
|
+
lines.append("## Appendix")
|
|
1845
|
+
lines.append("")
|
|
1846
|
+
lines.extend(appendix_lines)
|
|
1487
1847
|
|
|
1488
1848
|
# Certificate Hash for Integrity
|
|
1489
1849
|
cert_hash = _compute_certificate_hash(certificate)
|
|
@@ -1494,7 +1854,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1494
1854
|
lines.append("---")
|
|
1495
1855
|
lines.append("")
|
|
1496
1856
|
lines.append(
|
|
1497
|
-
"*This InvarLock
|
|
1857
|
+
"*This InvarLock evaluation certificate provides a comprehensive assessment of model compression safety.*"
|
|
1498
1858
|
)
|
|
1499
1859
|
lines.append(
|
|
1500
1860
|
"*All metrics are compared against the uncompressed baseline model for safety validation.*"
|