invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +2 -2
- invarlock/_data/runtime/tiers.yaml +57 -30
- invarlock/adapters/__init__.py +11 -15
- invarlock/adapters/auto.py +35 -40
- invarlock/adapters/capabilities.py +2 -2
- invarlock/adapters/hf_causal.py +418 -0
- invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
- invarlock/adapters/hf_mixin.py +25 -4
- invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
- invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
- invarlock/calibration/spectral_null.py +15 -10
- invarlock/calibration/variance_ve.py +0 -2
- invarlock/cli/adapter_auto.py +31 -21
- invarlock/cli/app.py +73 -2
- invarlock/cli/commands/calibrate.py +6 -2
- invarlock/cli/commands/certify.py +651 -91
- invarlock/cli/commands/doctor.py +11 -11
- invarlock/cli/commands/explain_gates.py +57 -8
- invarlock/cli/commands/plugins.py +13 -9
- invarlock/cli/commands/report.py +233 -69
- invarlock/cli/commands/run.py +1066 -244
- invarlock/cli/commands/verify.py +154 -15
- invarlock/cli/config.py +22 -6
- invarlock/cli/doctor_helpers.py +4 -5
- invarlock/cli/output.py +193 -0
- invarlock/cli/provenance.py +1 -1
- invarlock/core/api.py +45 -5
- invarlock/core/auto_tuning.py +65 -20
- invarlock/core/bootstrap.py +1 -1
- invarlock/core/contracts.py +7 -1
- invarlock/core/registry.py +11 -13
- invarlock/core/runner.py +425 -75
- invarlock/edits/quant_rtn.py +65 -37
- invarlock/eval/bench.py +3 -16
- invarlock/eval/data.py +82 -51
- invarlock/eval/metrics.py +63 -2
- invarlock/eval/primary_metric.py +23 -0
- invarlock/eval/tail_stats.py +230 -0
- invarlock/eval/tasks/__init__.py +12 -0
- invarlock/eval/tasks/classification.py +48 -0
- invarlock/eval/tasks/qa.py +36 -0
- invarlock/eval/tasks/text_generation.py +102 -0
- invarlock/guards/_estimators.py +154 -0
- invarlock/guards/invariants.py +19 -10
- invarlock/guards/policies.py +16 -6
- invarlock/guards/rmt.py +627 -546
- invarlock/guards/spectral.py +348 -110
- invarlock/guards/tier_config.py +32 -30
- invarlock/guards/variance.py +7 -31
- invarlock/guards_ref/rmt_ref.py +23 -23
- invarlock/model_profile.py +90 -42
- invarlock/observability/health.py +6 -6
- invarlock/observability/metrics.py +108 -0
- invarlock/reporting/certificate.py +384 -55
- invarlock/reporting/certificate_schema.py +3 -2
- invarlock/reporting/dataset_hashing.py +15 -2
- invarlock/reporting/guards_analysis.py +350 -277
- invarlock/reporting/html.py +55 -5
- invarlock/reporting/normalizer.py +13 -0
- invarlock/reporting/policy_utils.py +38 -36
- invarlock/reporting/primary_metric_utils.py +71 -17
- invarlock/reporting/render.py +852 -431
- invarlock/reporting/report.py +40 -4
- invarlock/reporting/report_types.py +11 -3
- invarlock/reporting/telemetry.py +86 -0
- invarlock/reporting/validate.py +1 -18
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
- invarlock/adapters/hf_gpt2.py +0 -404
- invarlock/adapters/hf_llama.py +0 -487
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
- {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
invarlock/reporting/render.py
CHANGED
|
@@ -113,6 +113,462 @@ def _short_digest(v: str) -> str:
|
|
|
113
113
|
return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
def _render_executive_dashboard(cert: dict[str, Any]) -> str:
|
|
117
|
+
"""Render executive summary dashboard table."""
|
|
118
|
+
lines: list[str] = []
|
|
119
|
+
_append_safety_dashboard_section(lines, cert)
|
|
120
|
+
return "\n".join(lines).rstrip()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _append_safety_dashboard_section(
|
|
124
|
+
lines: list[str], certificate: dict[str, Any]
|
|
125
|
+
) -> None:
|
|
126
|
+
"""Append a concise, first-screen dashboard for the certificate."""
|
|
127
|
+
block = compute_console_validation_block(certificate)
|
|
128
|
+
overall_pass = bool(block.get("overall_pass"))
|
|
129
|
+
overall_status = (
|
|
130
|
+
f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
validation = certificate.get("validation", {}) or {}
|
|
134
|
+
pm = certificate.get("primary_metric", {}) or {}
|
|
135
|
+
auto = certificate.get("auto", {}) or {}
|
|
136
|
+
tier = str(auto.get("tier") or "balanced").lower()
|
|
137
|
+
|
|
138
|
+
# Primary metric summary
|
|
139
|
+
pm_kind = str(pm.get("kind", "")).lower()
|
|
140
|
+
pm_basis = pm.get("gating_basis") or pm.get("basis") or "point"
|
|
141
|
+
pm_ok: bool | None
|
|
142
|
+
if isinstance(validation, dict) and "primary_metric_acceptable" in validation:
|
|
143
|
+
pm_ok = bool(validation.get("primary_metric_acceptable"))
|
|
144
|
+
else:
|
|
145
|
+
pm_ok = None
|
|
146
|
+
pm_value = pm.get("ratio_vs_baseline")
|
|
147
|
+
|
|
148
|
+
if pm_kind in {"accuracy", "vqa_accuracy"}:
|
|
149
|
+
measured = f"{pm_value:+.2f} pp" if isinstance(pm_value, int | float) else "N/A"
|
|
150
|
+
th_map = {
|
|
151
|
+
"conservative": -0.5,
|
|
152
|
+
"balanced": -1.0,
|
|
153
|
+
"aggressive": -2.0,
|
|
154
|
+
"none": -1.0,
|
|
155
|
+
}
|
|
156
|
+
th = th_map.get(tier, -1.0)
|
|
157
|
+
threshold = f"≥ {th:+.2f} pp ({pm_basis})"
|
|
158
|
+
else:
|
|
159
|
+
measured = f"{pm_value:.3f}×" if isinstance(pm_value, int | float) else "N/A"
|
|
160
|
+
tier_thresholds = {
|
|
161
|
+
"conservative": 1.05,
|
|
162
|
+
"balanced": 1.10,
|
|
163
|
+
"aggressive": 1.20,
|
|
164
|
+
"none": 1.10,
|
|
165
|
+
}
|
|
166
|
+
ratio_limit = tier_thresholds.get(tier, 1.10)
|
|
167
|
+
target_ratio = auto.get("target_pm_ratio")
|
|
168
|
+
if isinstance(target_ratio, int | float) and target_ratio > 0:
|
|
169
|
+
ratio_limit = min(ratio_limit, float(target_ratio))
|
|
170
|
+
threshold = f"≤ {ratio_limit:.2f}× ({pm_basis})"
|
|
171
|
+
|
|
172
|
+
pm_status = (
|
|
173
|
+
f"{'✅' if pm_ok else '❌'} {measured}"
|
|
174
|
+
if isinstance(pm_ok, bool)
|
|
175
|
+
else f"🛈 {measured}"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Drift summary (final/preview ratio) when preview/final are numeric
|
|
179
|
+
drift_ok: bool | None
|
|
180
|
+
if isinstance(validation, dict) and "preview_final_drift_acceptable" in validation:
|
|
181
|
+
drift_ok = bool(validation.get("preview_final_drift_acceptable"))
|
|
182
|
+
else:
|
|
183
|
+
drift_ok = None
|
|
184
|
+
drift_val = "N/A"
|
|
185
|
+
try:
|
|
186
|
+
pv = (
|
|
187
|
+
float(pm.get("preview"))
|
|
188
|
+
if isinstance(pm.get("preview"), int | float)
|
|
189
|
+
else float("nan")
|
|
190
|
+
)
|
|
191
|
+
fv = (
|
|
192
|
+
float(pm.get("final"))
|
|
193
|
+
if isinstance(pm.get("final"), int | float)
|
|
194
|
+
else float("nan")
|
|
195
|
+
)
|
|
196
|
+
drift = (
|
|
197
|
+
fv / pv
|
|
198
|
+
if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
|
|
199
|
+
else float("nan")
|
|
200
|
+
)
|
|
201
|
+
if math.isfinite(drift):
|
|
202
|
+
drift_val = f"{drift:.3f}×"
|
|
203
|
+
except Exception:
|
|
204
|
+
drift_val = "N/A"
|
|
205
|
+
drift_status = (
|
|
206
|
+
f"{'✅' if drift_ok else '❌'} {drift_val}"
|
|
207
|
+
if isinstance(drift_ok, bool)
|
|
208
|
+
else f"🛈 {drift_val}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _gate_cell(key: str, ok_default: bool | None = None) -> str:
|
|
212
|
+
ok: bool | None
|
|
213
|
+
if not isinstance(validation, dict):
|
|
214
|
+
ok = ok_default
|
|
215
|
+
elif key not in validation:
|
|
216
|
+
ok = ok_default
|
|
217
|
+
else:
|
|
218
|
+
ok = bool(validation.get(key))
|
|
219
|
+
if ok is None:
|
|
220
|
+
return "🛈 N/A"
|
|
221
|
+
return "✅ PASS" if ok else "❌ FAIL"
|
|
222
|
+
|
|
223
|
+
overhead_ctx = certificate.get("guard_overhead", {}) or {}
|
|
224
|
+
overhead_evaluated = (
|
|
225
|
+
bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
|
|
226
|
+
)
|
|
227
|
+
overhead_row: tuple[str, str, str] | None = None
|
|
228
|
+
if overhead_evaluated:
|
|
229
|
+
overhead_pct = overhead_ctx.get("overhead_percent")
|
|
230
|
+
overhead_ratio = overhead_ctx.get("overhead_ratio")
|
|
231
|
+
if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
|
|
232
|
+
overhead_measured = f"{float(overhead_pct):+.2f}%"
|
|
233
|
+
elif isinstance(overhead_ratio, int | float) and math.isfinite(
|
|
234
|
+
float(overhead_ratio)
|
|
235
|
+
):
|
|
236
|
+
overhead_measured = f"{float(overhead_ratio):.3f}×"
|
|
237
|
+
else:
|
|
238
|
+
overhead_measured = "N/A"
|
|
239
|
+
threshold_pct = overhead_ctx.get("threshold_percent")
|
|
240
|
+
if isinstance(threshold_pct, int | float) and math.isfinite(
|
|
241
|
+
float(threshold_pct)
|
|
242
|
+
):
|
|
243
|
+
threshold_str = f"≤ +{float(threshold_pct):.1f}%"
|
|
244
|
+
else:
|
|
245
|
+
threshold_str = "≤ +1.0%"
|
|
246
|
+
overhead_row = (
|
|
247
|
+
"Overhead",
|
|
248
|
+
f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
|
|
249
|
+
if isinstance(validation, dict)
|
|
250
|
+
else f"🛈 {overhead_measured}",
|
|
251
|
+
threshold_str,
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
lines.append("## Safety Dashboard")
|
|
255
|
+
lines.append("")
|
|
256
|
+
lines.append("| Check | Status | Quick Summary |")
|
|
257
|
+
lines.append("|-------|--------|---------------|")
|
|
258
|
+
lines.append(f"| Overall | {overall_status} | Canonical gate outcomes |")
|
|
259
|
+
lines.append(f"| Primary Metric | {pm_status} | {threshold} |")
|
|
260
|
+
lines.append(f"| Drift | {drift_status} | 0.95–1.05× band |")
|
|
261
|
+
lines.append(
|
|
262
|
+
f"| Invariants | {_gate_cell('invariants_pass')} | Model integrity checks |"
|
|
263
|
+
)
|
|
264
|
+
lines.append(
|
|
265
|
+
f"| Spectral | {_gate_cell('spectral_stable')} | Weight matrix spectral norms |"
|
|
266
|
+
)
|
|
267
|
+
lines.append(f"| RMT | {_gate_cell('rmt_stable')} | Random Matrix Theory guard |")
|
|
268
|
+
if overhead_row:
|
|
269
|
+
lines.append(f"| {overhead_row[0]} | {overhead_row[1]} | {overhead_row[2]} |")
|
|
270
|
+
lines.append("")
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _append_primary_metric_section(
|
|
274
|
+
lines: list[str], certificate: dict[str, Any]
|
|
275
|
+
) -> None:
|
|
276
|
+
"""Append the Primary Metric section early for quick triage."""
|
|
277
|
+
pm = certificate.get("primary_metric")
|
|
278
|
+
if not isinstance(pm, dict) or not pm:
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
kind = pm.get("kind", "unknown")
|
|
282
|
+
lines.append("## Primary Metric")
|
|
283
|
+
lines.append("")
|
|
284
|
+
unit = pm.get("unit", "-")
|
|
285
|
+
paired = pm.get("paired", False)
|
|
286
|
+
|
|
287
|
+
estimated_flag = False
|
|
288
|
+
try:
|
|
289
|
+
if bool(pm.get("estimated")):
|
|
290
|
+
estimated_flag = True
|
|
291
|
+
elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
|
|
292
|
+
estimated_flag = True
|
|
293
|
+
except Exception:
|
|
294
|
+
estimated_flag = False
|
|
295
|
+
est_suffix = " (estimated)" if estimated_flag else ""
|
|
296
|
+
|
|
297
|
+
lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
|
|
298
|
+
gating_basis = pm.get("gating_basis") or pm.get("basis")
|
|
299
|
+
if gating_basis:
|
|
300
|
+
lines.append(f"- Basis: {gating_basis}")
|
|
301
|
+
if isinstance(paired, bool):
|
|
302
|
+
lines.append(f"- Paired: {paired}")
|
|
303
|
+
reps = pm.get("reps")
|
|
304
|
+
if isinstance(reps, int | float):
|
|
305
|
+
lines.append(f"- Bootstrap Reps: {int(reps)}")
|
|
306
|
+
ci = pm.get("ci") or pm.get("display_ci")
|
|
307
|
+
if (
|
|
308
|
+
isinstance(ci, list | tuple)
|
|
309
|
+
and len(ci) == 2
|
|
310
|
+
and all(isinstance(x, int | float) for x in ci)
|
|
311
|
+
):
|
|
312
|
+
lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
|
|
313
|
+
|
|
314
|
+
prev = pm.get("preview")
|
|
315
|
+
fin = pm.get("final")
|
|
316
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
317
|
+
|
|
318
|
+
lines.append("")
|
|
319
|
+
if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
|
|
320
|
+
lines.append(
|
|
321
|
+
"- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
|
|
322
|
+
)
|
|
323
|
+
lines.append("| Field | Value |")
|
|
324
|
+
lines.append("|-------|-------|")
|
|
325
|
+
lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
|
|
326
|
+
lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
|
|
327
|
+
|
|
328
|
+
if kind in {"accuracy", "vqa_accuracy"}:
|
|
329
|
+
lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
|
|
330
|
+
try:
|
|
331
|
+
base_pt = pm.get("baseline_point")
|
|
332
|
+
if isinstance(base_pt, int | float) and base_pt < 0.05:
|
|
333
|
+
lines.append("- Note: baseline < 5%; ratio suppressed; showing Δpp")
|
|
334
|
+
except Exception:
|
|
335
|
+
pass
|
|
336
|
+
else:
|
|
337
|
+
try:
|
|
338
|
+
lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
|
|
339
|
+
except Exception:
|
|
340
|
+
lines.append("| Ratio vs Baseline | N/A |")
|
|
341
|
+
lines.append("")
|
|
342
|
+
|
|
343
|
+
# Secondary metrics (informational)
|
|
344
|
+
try:
|
|
345
|
+
secs = certificate.get("secondary_metrics")
|
|
346
|
+
if isinstance(secs, list) and secs:
|
|
347
|
+
lines.append("## Secondary Metrics (informational)")
|
|
348
|
+
lines.append("")
|
|
349
|
+
lines.append("| Kind | Preview | Final | vs Baseline | CI |")
|
|
350
|
+
lines.append("|------|---------|-------|-------------|----|")
|
|
351
|
+
for m in secs:
|
|
352
|
+
if not isinstance(m, dict):
|
|
353
|
+
continue
|
|
354
|
+
k = m.get("kind", "?")
|
|
355
|
+
pv = _fmt_by_kind(m.get("preview"), str(k))
|
|
356
|
+
fv = _fmt_by_kind(m.get("final"), str(k))
|
|
357
|
+
rb = m.get("ratio_vs_baseline")
|
|
358
|
+
try:
|
|
359
|
+
rb_str = (
|
|
360
|
+
f"{float(rb):.3f}"
|
|
361
|
+
if (str(k).startswith("ppl"))
|
|
362
|
+
else _fmt_by_kind(rb, str(k))
|
|
363
|
+
)
|
|
364
|
+
except Exception:
|
|
365
|
+
rb_str = "N/A"
|
|
366
|
+
ci = m.get("display_ci") or m.get("ci")
|
|
367
|
+
if isinstance(ci, tuple | list) and len(ci) == 2:
|
|
368
|
+
ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
|
|
369
|
+
else:
|
|
370
|
+
ci_str = "–"
|
|
371
|
+
lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
|
|
372
|
+
lines.append("")
|
|
373
|
+
except Exception:
|
|
374
|
+
pass
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _append_policy_configuration_section(
|
|
378
|
+
lines: list[str], certificate: dict[str, Any]
|
|
379
|
+
) -> None:
|
|
380
|
+
resolved_policy = certificate.get("resolved_policy")
|
|
381
|
+
policy_provenance = certificate.get("policy_provenance", {}) or {}
|
|
382
|
+
has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
|
|
383
|
+
has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
|
|
384
|
+
if not (has_prov or has_resolved):
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
lines.append("## Policy Configuration")
|
|
388
|
+
lines.append("")
|
|
389
|
+
|
|
390
|
+
tier = None
|
|
391
|
+
if has_prov:
|
|
392
|
+
tier = policy_provenance.get("tier")
|
|
393
|
+
if not tier:
|
|
394
|
+
tier = (certificate.get("auto", {}) or {}).get("tier")
|
|
395
|
+
digest_value = None
|
|
396
|
+
if has_prov:
|
|
397
|
+
digest_value = policy_provenance.get("policy_digest")
|
|
398
|
+
if not digest_value:
|
|
399
|
+
digest_value = (certificate.get("policy_digest", {}) or {}).get(
|
|
400
|
+
"thresholds_hash"
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
summary_parts: list[str] = []
|
|
404
|
+
if tier:
|
|
405
|
+
summary_parts.append(f"**Tier:** {tier}")
|
|
406
|
+
if digest_value:
|
|
407
|
+
summary_parts.append(f"**Digest:** `{_short_digest(str(digest_value))}`")
|
|
408
|
+
if summary_parts:
|
|
409
|
+
lines.append(" | ".join(summary_parts))
|
|
410
|
+
|
|
411
|
+
if has_prov:
|
|
412
|
+
overrides_list = policy_provenance.get("overrides") or []
|
|
413
|
+
if overrides_list:
|
|
414
|
+
lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
|
|
415
|
+
else:
|
|
416
|
+
lines.append("- **Overrides:** (none)")
|
|
417
|
+
if policy_provenance.get("resolved_at"):
|
|
418
|
+
lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
|
|
419
|
+
|
|
420
|
+
if has_resolved:
|
|
421
|
+
lines.append("")
|
|
422
|
+
lines.append("<details>")
|
|
423
|
+
lines.append("<summary>Resolved Policy YAML</summary>")
|
|
424
|
+
lines.append("")
|
|
425
|
+
lines.append("```yaml")
|
|
426
|
+
resolved_yaml = yaml.safe_dump(
|
|
427
|
+
resolved_policy, sort_keys=True, width=80, default_flow_style=False
|
|
428
|
+
).strip()
|
|
429
|
+
for line in resolved_yaml.splitlines():
|
|
430
|
+
lines.append(line)
|
|
431
|
+
lines.append("```")
|
|
432
|
+
lines.append("")
|
|
433
|
+
lines.append("</details>")
|
|
434
|
+
|
|
435
|
+
lines.append("")
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _append_dataset_and_provenance_section(
|
|
439
|
+
lines: list[str], certificate: dict[str, Any]
|
|
440
|
+
) -> None:
|
|
441
|
+
dataset = certificate.get("dataset", {}) or {}
|
|
442
|
+
provenance_info = certificate.get("provenance", {}) or {}
|
|
443
|
+
|
|
444
|
+
has_dataset = isinstance(dataset, dict) and bool(dataset)
|
|
445
|
+
has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
|
|
446
|
+
if not (has_dataset or has_provenance):
|
|
447
|
+
return
|
|
448
|
+
|
|
449
|
+
lines.append("## Dataset and Provenance")
|
|
450
|
+
lines.append("")
|
|
451
|
+
|
|
452
|
+
if has_dataset:
|
|
453
|
+
prov = dataset.get("provider") or "unknown"
|
|
454
|
+
lines.append(f"- **Provider:** {prov}")
|
|
455
|
+
try:
|
|
456
|
+
seq_len_val = (
|
|
457
|
+
int(dataset.get("seq_len"))
|
|
458
|
+
if isinstance(dataset.get("seq_len"), int | float)
|
|
459
|
+
else dataset.get("seq_len")
|
|
460
|
+
)
|
|
461
|
+
except Exception: # pragma: no cover - defensive
|
|
462
|
+
seq_len_val = dataset.get("seq_len")
|
|
463
|
+
if seq_len_val is not None:
|
|
464
|
+
lines.append(f"- **Sequence Length:** {seq_len_val}")
|
|
465
|
+
windows_blk = (
|
|
466
|
+
dataset.get("windows", {})
|
|
467
|
+
if isinstance(dataset.get("windows"), dict)
|
|
468
|
+
else {}
|
|
469
|
+
)
|
|
470
|
+
win_prev = windows_blk.get("preview")
|
|
471
|
+
win_final = windows_blk.get("final")
|
|
472
|
+
if win_prev is not None and win_final is not None:
|
|
473
|
+
lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
|
|
474
|
+
if windows_blk.get("seed") is not None:
|
|
475
|
+
lines.append(f"- **Seed:** {windows_blk.get('seed')}")
|
|
476
|
+
hash_blk = (
|
|
477
|
+
dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
|
|
478
|
+
)
|
|
479
|
+
if hash_blk.get("preview_tokens") is not None:
|
|
480
|
+
lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
|
|
481
|
+
if hash_blk.get("final_tokens") is not None:
|
|
482
|
+
lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
|
|
483
|
+
if hash_blk.get("total_tokens") is not None:
|
|
484
|
+
lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
|
|
485
|
+
if hash_blk.get("dataset"):
|
|
486
|
+
lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
|
|
487
|
+
tokenizer = dataset.get("tokenizer", {})
|
|
488
|
+
if isinstance(tokenizer, dict) and (
|
|
489
|
+
tokenizer.get("name") or tokenizer.get("hash")
|
|
490
|
+
):
|
|
491
|
+
vocab_size = tokenizer.get("vocab_size")
|
|
492
|
+
vocab_suffix = (
|
|
493
|
+
f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
|
|
494
|
+
)
|
|
495
|
+
lines.append(
|
|
496
|
+
f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
|
|
497
|
+
)
|
|
498
|
+
if tokenizer.get("hash"):
|
|
499
|
+
lines.append(f" - Hash: {tokenizer['hash']}")
|
|
500
|
+
lines.append(
|
|
501
|
+
f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
|
|
502
|
+
)
|
|
503
|
+
if tokenizer.get("pad_token") is not None:
|
|
504
|
+
lines.append(f" - PAD: {tokenizer.get('pad_token')}")
|
|
505
|
+
if tokenizer.get("add_prefix_space") is not None:
|
|
506
|
+
lines.append(
|
|
507
|
+
f" - add_prefix_space: {tokenizer.get('add_prefix_space')}"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
if has_provenance:
|
|
511
|
+
baseline_info = provenance_info.get("baseline", {}) or {}
|
|
512
|
+
edited_info = provenance_info.get("edited", {}) or {}
|
|
513
|
+
|
|
514
|
+
if baseline_info or edited_info:
|
|
515
|
+
lines.append("")
|
|
516
|
+
if baseline_info:
|
|
517
|
+
lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
|
|
518
|
+
if baseline_info.get("report_hash"):
|
|
519
|
+
lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
|
|
520
|
+
if baseline_info.get("report_path"):
|
|
521
|
+
lines.append(f" - Report Path: {baseline_info.get('report_path')}")
|
|
522
|
+
if edited_info:
|
|
523
|
+
lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
|
|
524
|
+
if edited_info.get("report_hash"):
|
|
525
|
+
lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
|
|
526
|
+
if edited_info.get("report_path"):
|
|
527
|
+
lines.append(f" - Report Path: {edited_info.get('report_path')}")
|
|
528
|
+
|
|
529
|
+
provider_digest = provenance_info.get("provider_digest")
|
|
530
|
+
if isinstance(provider_digest, dict) and provider_digest:
|
|
531
|
+
ids_d = provider_digest.get("ids_sha256")
|
|
532
|
+
tok_d = provider_digest.get("tokenizer_sha256")
|
|
533
|
+
mask_d = provider_digest.get("masking_sha256")
|
|
534
|
+
|
|
535
|
+
lines.append("- **Provider Digest:**")
|
|
536
|
+
if tok_d:
|
|
537
|
+
lines.append(
|
|
538
|
+
f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
|
|
539
|
+
)
|
|
540
|
+
if ids_d:
|
|
541
|
+
lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
|
|
542
|
+
if mask_d:
|
|
543
|
+
lines.append(
|
|
544
|
+
f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
conf = certificate.get("confidence", {}) or {}
|
|
549
|
+
if isinstance(conf, dict) and conf.get("label"):
|
|
550
|
+
lines.append(f"- **Confidence:** {conf.get('label')}")
|
|
551
|
+
except Exception:
|
|
552
|
+
pass
|
|
553
|
+
|
|
554
|
+
try:
|
|
555
|
+
pd = certificate.get("policy_digest", {}) or {}
|
|
556
|
+
if isinstance(pd, dict) and pd:
|
|
557
|
+
pv = pd.get("policy_version")
|
|
558
|
+
th = pd.get("thresholds_hash")
|
|
559
|
+
if pv:
|
|
560
|
+
lines.append(f"- **Policy Version:** {pv}")
|
|
561
|
+
if isinstance(th, str) and th:
|
|
562
|
+
short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
|
|
563
|
+
lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
|
|
564
|
+
if pd.get("changed"):
|
|
565
|
+
lines.append("- Note: policy changed")
|
|
566
|
+
except Exception:
|
|
567
|
+
pass
|
|
568
|
+
|
|
569
|
+
lines.append("")
|
|
570
|
+
|
|
571
|
+
|
|
116
572
|
def _fmt_by_kind(x: Any, k: str) -> str:
|
|
117
573
|
try:
|
|
118
574
|
xv = float(x)
|
|
@@ -275,11 +731,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
275
731
|
if not validate_certificate(certificate):
|
|
276
732
|
raise ValueError("Invalid certificate structure")
|
|
277
733
|
|
|
278
|
-
lines = []
|
|
734
|
+
lines: list[str] = []
|
|
735
|
+
appendix_lines: list[str] = []
|
|
279
736
|
edit_name = str(certificate.get("edit_name") or "").lower()
|
|
280
737
|
|
|
281
738
|
# Header
|
|
282
|
-
lines.append("# InvarLock
|
|
739
|
+
lines.append("# InvarLock Evaluation Certificate")
|
|
283
740
|
lines.append("")
|
|
284
741
|
lines.append(
|
|
285
742
|
"> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
|
|
@@ -291,6 +748,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
291
748
|
lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
|
|
292
749
|
lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
|
|
293
750
|
lines.append("")
|
|
751
|
+
lines.append(
|
|
752
|
+
"> Full evidence: see [`evaluation.cert.json`](evaluation.cert.json) for complete provenance, digests, and raw measurements."
|
|
753
|
+
)
|
|
754
|
+
lines.append("")
|
|
294
755
|
|
|
295
756
|
plugins = certificate.get("plugins", {})
|
|
296
757
|
if isinstance(plugins, dict) and plugins:
|
|
@@ -314,7 +775,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
314
775
|
]
|
|
315
776
|
if guard_entries:
|
|
316
777
|
lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
|
|
317
|
-
|
|
778
|
+
lines.append("")
|
|
318
779
|
|
|
319
780
|
# Executive Summary with validation status (canonical, from console block)
|
|
320
781
|
lines.append("## Executive Summary")
|
|
@@ -354,6 +815,25 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
354
815
|
pass
|
|
355
816
|
lines.append("")
|
|
356
817
|
|
|
818
|
+
dashboard = _render_executive_dashboard(certificate)
|
|
819
|
+
if dashboard:
|
|
820
|
+
lines.extend(dashboard.splitlines())
|
|
821
|
+
lines.append("")
|
|
822
|
+
|
|
823
|
+
lines.append("## Contents")
|
|
824
|
+
lines.append("")
|
|
825
|
+
lines.append("- [Safety Dashboard](#safety-dashboard)")
|
|
826
|
+
lines.append("- [Quality Gates](#quality-gates)")
|
|
827
|
+
lines.append("- [Safety Check Details](#safety-check-details)")
|
|
828
|
+
lines.append("- [Primary Metric](#primary-metric)")
|
|
829
|
+
lines.append("- [Guard Observability](#guard-observability)")
|
|
830
|
+
lines.append("- [Model Information](#model-information)")
|
|
831
|
+
lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
|
|
832
|
+
lines.append("- [Policy Configuration](#policy-configuration)")
|
|
833
|
+
lines.append("- [Appendix](#appendix)")
|
|
834
|
+
lines.append("- [Certificate Integrity](#certificate-integrity)")
|
|
835
|
+
lines.append("")
|
|
836
|
+
|
|
357
837
|
# Validation table with canonical gates (mirrors console allow-list)
|
|
358
838
|
lines.append("## Quality Gates")
|
|
359
839
|
lines.append("")
|
|
@@ -410,6 +890,31 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
410
890
|
)
|
|
411
891
|
)
|
|
412
892
|
status = "✅ PASS" if ok else "❌ FAIL"
|
|
893
|
+
drift_min = 0.95
|
|
894
|
+
drift_max = 1.05
|
|
895
|
+
try:
|
|
896
|
+
drift_band = (
|
|
897
|
+
pm_block.get("drift_band") if isinstance(pm_block, dict) else None
|
|
898
|
+
)
|
|
899
|
+
if isinstance(drift_band, dict):
|
|
900
|
+
lo = drift_band.get("min")
|
|
901
|
+
hi = drift_band.get("max")
|
|
902
|
+
if isinstance(lo, int | float) and isinstance(hi, int | float):
|
|
903
|
+
lo_f = float(lo)
|
|
904
|
+
hi_f = float(hi)
|
|
905
|
+
if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
|
|
906
|
+
drift_min = lo_f
|
|
907
|
+
drift_max = hi_f
|
|
908
|
+
elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
|
|
909
|
+
lo_raw, hi_raw = drift_band[0], drift_band[1]
|
|
910
|
+
if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
|
|
911
|
+
lo_f = float(lo_raw)
|
|
912
|
+
hi_f = float(hi_raw)
|
|
913
|
+
if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
|
|
914
|
+
drift_min = lo_f
|
|
915
|
+
drift_max = hi_f
|
|
916
|
+
except Exception:
|
|
917
|
+
pass
|
|
413
918
|
# Compute drift from PM preview/final when available
|
|
414
919
|
try:
|
|
415
920
|
pv = (
|
|
@@ -430,8 +935,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
430
935
|
except Exception:
|
|
431
936
|
drift = float("nan")
|
|
432
937
|
measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
|
|
938
|
+
band_label = f"{drift_min:.2f}–{drift_max:.2f}x"
|
|
433
939
|
lines.append(
|
|
434
|
-
f"| Preview Final Drift Acceptable | {status} | {measured} |
|
|
940
|
+
f"| Preview Final Drift Acceptable | {status} | {measured} | {band_label} | point | Final/Preview ratio stability |"
|
|
435
941
|
)
|
|
436
942
|
|
|
437
943
|
# Helper to emit Guard Overhead Acceptable row (only when evaluated)
|
|
@@ -468,9 +974,70 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
468
974
|
f"| Guard Overhead Acceptable | {status} | {measured} | ≤ +{threshold_pct:.1f}% | point | Guarded vs bare PM overhead |"
|
|
469
975
|
)
|
|
470
976
|
|
|
977
|
+
def _emit_pm_tail_gate_row() -> None:
|
|
978
|
+
pm_tail = certificate.get("primary_metric_tail", {}) or {}
|
|
979
|
+
if not isinstance(pm_tail, dict) or not pm_tail:
|
|
980
|
+
return
|
|
981
|
+
|
|
982
|
+
evaluated = bool(pm_tail.get("evaluated", False))
|
|
983
|
+
mode = str(pm_tail.get("mode", "warn") or "warn").strip().lower()
|
|
984
|
+
passed = bool(pm_tail.get("passed", True))
|
|
985
|
+
warned = bool(pm_tail.get("warned", False))
|
|
986
|
+
|
|
987
|
+
if not evaluated:
|
|
988
|
+
status = "🛈 INFO"
|
|
989
|
+
elif passed:
|
|
990
|
+
status = "✅ PASS"
|
|
991
|
+
elif mode == "fail":
|
|
992
|
+
status = "❌ FAIL"
|
|
993
|
+
else:
|
|
994
|
+
status = "⚠️ WARN" if warned else "⚠️ WARN"
|
|
995
|
+
|
|
996
|
+
policy = (
|
|
997
|
+
pm_tail.get("policy", {}) if isinstance(pm_tail.get("policy"), dict) else {}
|
|
998
|
+
)
|
|
999
|
+
stats = (
|
|
1000
|
+
pm_tail.get("stats", {}) if isinstance(pm_tail.get("stats"), dict) else {}
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
q = policy.get("quantile", 0.95)
|
|
1004
|
+
try:
|
|
1005
|
+
qf = float(q)
|
|
1006
|
+
except Exception:
|
|
1007
|
+
qf = 0.95
|
|
1008
|
+
qf = max(0.0, min(1.0, qf))
|
|
1009
|
+
q_key = f"q{int(round(100.0 * qf))}"
|
|
1010
|
+
q_name = f"P{int(round(100.0 * qf))}"
|
|
1011
|
+
q_val = stats.get(q_key)
|
|
1012
|
+
mass_val = stats.get("tail_mass")
|
|
1013
|
+
eps = policy.get("epsilon", stats.get("epsilon"))
|
|
1014
|
+
|
|
1015
|
+
measured_parts: list[str] = []
|
|
1016
|
+
if isinstance(q_val, int | float) and math.isfinite(float(q_val)):
|
|
1017
|
+
measured_parts.append(f"{q_name}={float(q_val):.3f}")
|
|
1018
|
+
if isinstance(mass_val, int | float) and math.isfinite(float(mass_val)):
|
|
1019
|
+
measured_parts.append(f"mass={float(mass_val):.3f}")
|
|
1020
|
+
measured = ", ".join(measured_parts) if measured_parts else "N/A"
|
|
1021
|
+
|
|
1022
|
+
thr_parts: list[str] = []
|
|
1023
|
+
qmax = policy.get("quantile_max")
|
|
1024
|
+
if isinstance(qmax, int | float) and math.isfinite(float(qmax)):
|
|
1025
|
+
thr_parts.append(f"{q_name}≤{float(qmax):.3f}")
|
|
1026
|
+
mmax = policy.get("mass_max")
|
|
1027
|
+
if isinstance(mmax, int | float) and math.isfinite(float(mmax)):
|
|
1028
|
+
thr_parts.append(f"mass≤{float(mmax):.3f}")
|
|
1029
|
+
if isinstance(eps, int | float) and math.isfinite(float(eps)):
|
|
1030
|
+
thr_parts.append(f"ε={float(eps):.1e}")
|
|
1031
|
+
threshold = "; ".join(thr_parts) if thr_parts else "policy"
|
|
1032
|
+
|
|
1033
|
+
lines.append(
|
|
1034
|
+
f"| Primary Metric Tail | {status} | {measured} | {threshold} | {q_name.lower()} | Tail regression vs baseline (ΔlogNLL) |"
|
|
1035
|
+
)
|
|
1036
|
+
|
|
471
1037
|
# Emit canonical gate rows
|
|
472
1038
|
if has_pm:
|
|
473
1039
|
_emit_pm_gate_row()
|
|
1040
|
+
_emit_pm_tail_gate_row()
|
|
474
1041
|
_emit_drift_gate_row()
|
|
475
1042
|
_emit_overhead_gate_row()
|
|
476
1043
|
|
|
@@ -555,14 +1122,39 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
555
1122
|
or overlap_frac is not None
|
|
556
1123
|
):
|
|
557
1124
|
lines.append("")
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
1125
|
+
parts: list[str] = []
|
|
1126
|
+
if paired_windows is not None:
|
|
1127
|
+
try:
|
|
1128
|
+
parts.append(f"{int(paired_windows)} windows")
|
|
1129
|
+
except Exception:
|
|
1130
|
+
parts.append(f"windows={paired_windows}")
|
|
1131
|
+
if isinstance(match_frac, int | float) and math.isfinite(float(match_frac)):
|
|
1132
|
+
parts.append(f"{float(match_frac) * 100.0:.1f}% match")
|
|
1133
|
+
elif match_frac is not None:
|
|
1134
|
+
parts.append(f"match={match_frac}")
|
|
1135
|
+
if isinstance(overlap_frac, int | float) and math.isfinite(
|
|
1136
|
+
float(overlap_frac)
|
|
1137
|
+
):
|
|
1138
|
+
parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
|
|
1139
|
+
elif overlap_frac is not None:
|
|
1140
|
+
parts.append(f"overlap={overlap_frac}")
|
|
1141
|
+
lines.append(f"✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
|
|
561
1142
|
if isinstance(bootstrap, dict):
|
|
562
1143
|
reps = bootstrap.get("replicates")
|
|
563
1144
|
bseed = bootstrap.get("seed")
|
|
564
1145
|
if reps is not None or bseed is not None:
|
|
565
|
-
|
|
1146
|
+
bits: list[str] = []
|
|
1147
|
+
if reps is not None:
|
|
1148
|
+
try:
|
|
1149
|
+
bits.append(f"{int(reps)} replicates")
|
|
1150
|
+
except Exception:
|
|
1151
|
+
bits.append(f"replicates={reps}")
|
|
1152
|
+
if bseed is not None:
|
|
1153
|
+
try:
|
|
1154
|
+
bits.append(f"seed={int(bseed)}")
|
|
1155
|
+
except Exception:
|
|
1156
|
+
bits.append(f"seed={bseed}")
|
|
1157
|
+
lines.append(f"✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
|
|
566
1158
|
# Optional: show log-space paired Δ CI next to ratio CI for clarity
|
|
567
1159
|
delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
|
|
568
1160
|
"ppl", {}
|
|
@@ -572,7 +1164,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
572
1164
|
and len(delta_ci) == 2
|
|
573
1165
|
and all(isinstance(x, int | float) for x in delta_ci)
|
|
574
1166
|
):
|
|
575
|
-
lines.append(f"
|
|
1167
|
+
lines.append(f"🛈 Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
|
|
576
1168
|
except Exception:
|
|
577
1169
|
pass
|
|
578
1170
|
|
|
@@ -593,116 +1185,179 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
593
1185
|
|
|
594
1186
|
lines.append("")
|
|
595
1187
|
|
|
1188
|
+
_append_primary_metric_section(lines, certificate)
|
|
1189
|
+
|
|
596
1190
|
# Guard observability snapshots
|
|
597
1191
|
lines.append("## Guard Observability")
|
|
598
1192
|
lines.append("")
|
|
599
1193
|
|
|
600
1194
|
spectral_info = certificate.get("spectral", {}) or {}
|
|
601
1195
|
if spectral_info:
|
|
602
|
-
lines.append("### Spectral Guard")
|
|
1196
|
+
lines.append("### Spectral Guard Summary")
|
|
603
1197
|
lines.append("")
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
spec_sigma = spectral_info.get("sigma_quantile")
|
|
616
|
-
spec_deadband = spectral_info.get("deadband")
|
|
617
|
-
spec_max_caps = spectral_info.get("max_caps")
|
|
618
|
-
summary_yaml = {
|
|
619
|
-
"sigma_quantile": float(spec_sigma)
|
|
620
|
-
if isinstance(spec_sigma, int | float)
|
|
621
|
-
else None,
|
|
622
|
-
"deadband": float(spec_deadband)
|
|
623
|
-
if isinstance(spec_deadband, int | float)
|
|
624
|
-
else None,
|
|
625
|
-
"max_caps": int(spec_max_caps)
|
|
626
|
-
if isinstance(spec_max_caps, int | float)
|
|
627
|
-
else None,
|
|
628
|
-
}
|
|
629
|
-
# Drop Nones from summary
|
|
630
|
-
summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
|
|
631
|
-
if summary_yaml:
|
|
632
|
-
lines.append("- **Spectral Summary:**")
|
|
633
|
-
lines.append(" ```yaml")
|
|
634
|
-
for line in (
|
|
635
|
-
yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
|
|
636
|
-
.strip()
|
|
637
|
-
.splitlines()
|
|
638
|
-
):
|
|
639
|
-
lines.append(f" {line}")
|
|
640
|
-
lines.append(" ```")
|
|
1198
|
+
lines.append("| Metric | Value | Status |")
|
|
1199
|
+
lines.append("|--------|-------|--------|")
|
|
1200
|
+
|
|
1201
|
+
spectral_ok = bool(validation.get("spectral_stable", False))
|
|
1202
|
+
caps_applied = spectral_info.get("caps_applied")
|
|
1203
|
+
max_caps = spectral_info.get("max_caps")
|
|
1204
|
+
caps_val = (
|
|
1205
|
+
f"{caps_applied}/{max_caps}"
|
|
1206
|
+
if caps_applied is not None and max_caps is not None
|
|
1207
|
+
else "-"
|
|
1208
|
+
)
|
|
641
1209
|
lines.append(
|
|
642
|
-
f"
|
|
1210
|
+
f"| Caps Applied | {caps_val} | {'✅ OK' if spectral_ok else '❌ FAIL'} |"
|
|
643
1211
|
)
|
|
1212
|
+
|
|
644
1213
|
summary = spectral_info.get("summary", {}) or {}
|
|
645
|
-
|
|
646
|
-
|
|
1214
|
+
caps_exceeded = summary.get("caps_exceeded")
|
|
1215
|
+
if caps_exceeded is not None:
|
|
1216
|
+
cap_status = "✅ OK" if not bool(caps_exceeded) else "⚠️ WARN"
|
|
1217
|
+
lines.append(f"| Caps Exceeded | {caps_exceeded} | {cap_status} |")
|
|
1218
|
+
|
|
1219
|
+
top_scores = spectral_info.get("top_z_scores") or {}
|
|
1220
|
+
max_family: str | None = None
|
|
1221
|
+
max_module: str | None = None
|
|
1222
|
+
max_abs_z: float | None = None
|
|
1223
|
+
if isinstance(top_scores, dict):
|
|
1224
|
+
for family, entries in top_scores.items():
|
|
1225
|
+
if not isinstance(entries, list):
|
|
1226
|
+
continue
|
|
1227
|
+
for entry in entries:
|
|
1228
|
+
if not isinstance(entry, dict):
|
|
1229
|
+
continue
|
|
1230
|
+
z_val = entry.get("z")
|
|
1231
|
+
if not (
|
|
1232
|
+
isinstance(z_val, int | float) and math.isfinite(float(z_val))
|
|
1233
|
+
):
|
|
1234
|
+
continue
|
|
1235
|
+
z_abs = abs(float(z_val))
|
|
1236
|
+
if max_abs_z is None or z_abs > max_abs_z:
|
|
1237
|
+
max_abs_z = z_abs
|
|
1238
|
+
max_family = str(family)
|
|
1239
|
+
max_module = (
|
|
1240
|
+
str(entry.get("module")) if entry.get("module") else None
|
|
1241
|
+
)
|
|
1242
|
+
|
|
647
1243
|
family_caps = spectral_info.get("family_caps") or {}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
kappa =
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
1244
|
+
kappa = None
|
|
1245
|
+
if max_family and isinstance(family_caps, dict):
|
|
1246
|
+
try:
|
|
1247
|
+
kappa = (family_caps.get(max_family, {}) or {}).get("kappa")
|
|
1248
|
+
except Exception:
|
|
1249
|
+
kappa = None
|
|
1250
|
+
kappa_f = (
|
|
1251
|
+
float(kappa)
|
|
1252
|
+
if isinstance(kappa, int | float) and math.isfinite(float(kappa))
|
|
1253
|
+
else None
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1256
|
+
if max_abs_z is not None:
|
|
1257
|
+
max_val = f"{max_abs_z:.3f}"
|
|
1258
|
+
if max_family:
|
|
1259
|
+
max_val += f" ({max_family})"
|
|
1260
|
+
if max_module:
|
|
1261
|
+
max_val += f" – {max_module}"
|
|
1262
|
+
if kappa_f is None:
|
|
1263
|
+
max_status = "🛈 No κ"
|
|
1264
|
+
elif max_abs_z <= kappa_f:
|
|
1265
|
+
max_status = f"✅ Within κ={kappa_f:.3f}"
|
|
1266
|
+
else:
|
|
1267
|
+
max_status = f"❌ Exceeds κ={kappa_f:.3f}"
|
|
1268
|
+
lines.append(f"| Max |z| | {max_val} | {max_status} |")
|
|
1269
|
+
|
|
1270
|
+
mt_info = spectral_info.get("multiple_testing", {}) or {}
|
|
1271
|
+
if isinstance(mt_info, dict) and mt_info:
|
|
1272
|
+
mt_method = mt_info.get("method")
|
|
1273
|
+
mt_alpha = mt_info.get("alpha")
|
|
1274
|
+
mt_m = mt_info.get("m")
|
|
1275
|
+
parts: list[str] = []
|
|
1276
|
+
if mt_method:
|
|
1277
|
+
parts.append(f"method={mt_method}")
|
|
1278
|
+
if isinstance(mt_alpha, int | float) and math.isfinite(float(mt_alpha)):
|
|
1279
|
+
parts.append(f"α={float(mt_alpha):.3g}")
|
|
1280
|
+
if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
|
|
1281
|
+
parts.append(f"m={int(mt_m)}")
|
|
1282
|
+
lines.append(
|
|
1283
|
+
f"| Multiple Testing | {', '.join(parts) if parts else '—'} | 🛈 INFO |"
|
|
1284
|
+
)
|
|
1285
|
+
|
|
1286
|
+
lines.append("")
|
|
1287
|
+
|
|
1288
|
+
caps_by_family = spectral_info.get("caps_applied_by_family") or {}
|
|
660
1289
|
quantiles = spectral_info.get("family_z_quantiles") or {}
|
|
661
|
-
if
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
1290
|
+
if any(
|
|
1291
|
+
bool(x)
|
|
1292
|
+
for x in (caps_by_family, quantiles, family_caps, top_scores)
|
|
1293
|
+
if isinstance(x, dict)
|
|
1294
|
+
):
|
|
1295
|
+
lines.append("<details>")
|
|
1296
|
+
lines.append("<summary>Per-family details</summary>")
|
|
1297
|
+
lines.append("")
|
|
1298
|
+
lines.append("| Family | κ | q95 | Max |z| | Violations |")
|
|
1299
|
+
lines.append("|--------|---|-----|--------|------------|")
|
|
1300
|
+
|
|
1301
|
+
families: set[str] = set()
|
|
1302
|
+
for block in (caps_by_family, quantiles, family_caps, top_scores):
|
|
1303
|
+
if isinstance(block, dict):
|
|
1304
|
+
families.update(str(k) for k in block.keys())
|
|
1305
|
+
|
|
1306
|
+
for family in sorted(families):
|
|
1307
|
+
kappa = None
|
|
1308
|
+
if isinstance(family_caps, dict):
|
|
1309
|
+
kappa = (family_caps.get(family, {}) or {}).get("kappa")
|
|
1310
|
+
kappa_str = (
|
|
1311
|
+
f"{float(kappa):.3f}"
|
|
1312
|
+
if isinstance(kappa, int | float) and math.isfinite(float(kappa))
|
|
1313
|
+
else "-"
|
|
1314
|
+
)
|
|
1315
|
+
|
|
1316
|
+
q95 = None
|
|
1317
|
+
max_z = None
|
|
1318
|
+
if isinstance(quantiles, dict):
|
|
1319
|
+
stats = quantiles.get(family) or {}
|
|
1320
|
+
if isinstance(stats, dict):
|
|
1321
|
+
q95 = stats.get("q95")
|
|
1322
|
+
max_z = stats.get("max")
|
|
669
1323
|
q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
|
|
670
|
-
q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
|
|
671
1324
|
max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
|
|
672
|
-
|
|
1325
|
+
|
|
1326
|
+
violations = None
|
|
1327
|
+
if isinstance(caps_by_family, dict):
|
|
1328
|
+
violations = caps_by_family.get(family)
|
|
1329
|
+
v_str = (
|
|
1330
|
+
str(int(violations)) if isinstance(violations, int | float) else "0"
|
|
1331
|
+
)
|
|
1332
|
+
|
|
673
1333
|
lines.append(
|
|
674
|
-
f"| {family} | {
|
|
1334
|
+
f"| {family} | {kappa_str} | {q95_str} | {max_str} | {v_str} |"
|
|
675
1335
|
)
|
|
1336
|
+
|
|
1337
|
+
if isinstance(top_scores, dict) and top_scores:
|
|
1338
|
+
lines.append("")
|
|
1339
|
+
lines.append("Top |z| per family:")
|
|
1340
|
+
for family in sorted(top_scores.keys()):
|
|
1341
|
+
entries = top_scores[family]
|
|
1342
|
+
if not isinstance(entries, list) or not entries:
|
|
1343
|
+
continue
|
|
1344
|
+
formatted_entries = []
|
|
1345
|
+
for entry in entries:
|
|
1346
|
+
if not isinstance(entry, dict):
|
|
1347
|
+
continue
|
|
1348
|
+
module_name = entry.get("module", "unknown")
|
|
1349
|
+
z_val = entry.get("z")
|
|
1350
|
+
if isinstance(z_val, int | float) and math.isfinite(
|
|
1351
|
+
float(z_val)
|
|
1352
|
+
):
|
|
1353
|
+
z_str = f"{z_val:.3f}"
|
|
1354
|
+
else:
|
|
1355
|
+
z_str = "n/a"
|
|
1356
|
+
formatted_entries.append(f"{module_name} (|z|={z_str})")
|
|
1357
|
+
lines.append(f"- {family}: {', '.join(formatted_entries)}")
|
|
1358
|
+
|
|
676
1359
|
lines.append("")
|
|
677
|
-
|
|
678
|
-
if policy_caps:
|
|
679
|
-
lines.append("- **Family κ (policy):**")
|
|
680
|
-
lines.append(" ```yaml")
|
|
681
|
-
caps_yaml = (
|
|
682
|
-
yaml.safe_dump(policy_caps, sort_keys=True, width=70)
|
|
683
|
-
.strip()
|
|
684
|
-
.splitlines()
|
|
685
|
-
)
|
|
686
|
-
for line in caps_yaml:
|
|
687
|
-
lines.append(f" {line}")
|
|
688
|
-
lines.append(" ```")
|
|
689
|
-
top_scores = spectral_info.get("top_z_scores") or {}
|
|
690
|
-
if top_scores:
|
|
691
|
-
lines.append("Top |z| per family:")
|
|
692
|
-
for family in sorted(top_scores.keys()):
|
|
693
|
-
entries = top_scores[family]
|
|
694
|
-
if not entries:
|
|
695
|
-
continue
|
|
696
|
-
formatted_entries = []
|
|
697
|
-
for entry in entries:
|
|
698
|
-
module_name = entry.get("module", "unknown")
|
|
699
|
-
z_val = entry.get("z")
|
|
700
|
-
if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
|
|
701
|
-
z_str = f"{z_val:.3f}"
|
|
702
|
-
else:
|
|
703
|
-
z_str = "n/a"
|
|
704
|
-
formatted_entries.append(f"{module_name} (|z|={z_str})")
|
|
705
|
-
lines.append(f"- {family}: {', '.join(formatted_entries)}")
|
|
1360
|
+
lines.append("</details>")
|
|
706
1361
|
lines.append("")
|
|
707
1362
|
|
|
708
1363
|
rmt_info = certificate.get("rmt", {}) or {}
|
|
@@ -710,7 +1365,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
710
1365
|
lines.append("### RMT Guard")
|
|
711
1366
|
lines.append("")
|
|
712
1367
|
families = rmt_info.get("families") or {}
|
|
1368
|
+
stable = bool(rmt_info.get("stable", True))
|
|
1369
|
+
status = "✅ OK" if stable else "❌ FAIL"
|
|
1370
|
+
delta_total = rmt_info.get("delta_total")
|
|
1371
|
+
if isinstance(delta_total, int):
|
|
1372
|
+
lines.append(f"- Δ total: {delta_total:+d}")
|
|
1373
|
+
lines.append(f"- Status: {status}")
|
|
1374
|
+
lines.append(f"- Families: {len(families)}")
|
|
713
1375
|
if families:
|
|
1376
|
+
lines.append("")
|
|
1377
|
+
lines.append("<details>")
|
|
1378
|
+
lines.append("<summary>RMT family details</summary>")
|
|
1379
|
+
lines.append("")
|
|
714
1380
|
lines.append("| Family | ε_f | Bare | Guarded | Δ |")
|
|
715
1381
|
lines.append("|--------|-----|------|---------|---|")
|
|
716
1382
|
for family, data in families.items():
|
|
@@ -740,12 +1406,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
740
1406
|
f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
|
|
741
1407
|
)
|
|
742
1408
|
lines.append("")
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
lines.append(
|
|
747
|
-
lines.append(f"- Stable: {rmt_info.get('stable', True)}")
|
|
748
|
-
lines.append("")
|
|
1409
|
+
lines.append("</details>")
|
|
1410
|
+
lines.append("")
|
|
1411
|
+
else:
|
|
1412
|
+
lines.append("")
|
|
749
1413
|
|
|
750
1414
|
guard_overhead_info = certificate.get("guard_overhead", {}) or {}
|
|
751
1415
|
if guard_overhead_info:
|
|
@@ -802,21 +1466,21 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
802
1466
|
inference_sources = compression_diag.get("inference_source") or {}
|
|
803
1467
|
inference_log = compression_diag.get("inference_log") or []
|
|
804
1468
|
if inference_flags or inference_sources or inference_log:
|
|
805
|
-
|
|
806
|
-
|
|
1469
|
+
appendix_lines.append("### Inference Diagnostics")
|
|
1470
|
+
appendix_lines.append("")
|
|
807
1471
|
if inference_flags:
|
|
808
|
-
|
|
1472
|
+
appendix_lines.append("- **Fields Inferred:**")
|
|
809
1473
|
for field, flag in inference_flags.items():
|
|
810
|
-
|
|
1474
|
+
appendix_lines.append(f" - {field}: {'yes' if flag else 'no'}")
|
|
811
1475
|
if inference_sources:
|
|
812
|
-
|
|
1476
|
+
appendix_lines.append("- **Sources:**")
|
|
813
1477
|
for field, source in inference_sources.items():
|
|
814
|
-
|
|
1478
|
+
appendix_lines.append(f" - {field}: {source}")
|
|
815
1479
|
if inference_log:
|
|
816
|
-
|
|
1480
|
+
appendix_lines.append("- **Inference Log:**")
|
|
817
1481
|
for entry in inference_log:
|
|
818
|
-
|
|
819
|
-
|
|
1482
|
+
appendix_lines.append(f" - {entry}")
|
|
1483
|
+
appendix_lines.append("")
|
|
820
1484
|
|
|
821
1485
|
# Model and Configuration
|
|
822
1486
|
lines.append("## Model Information")
|
|
@@ -845,28 +1509,48 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
845
1509
|
if invarlock_version:
|
|
846
1510
|
lines.append(f"- **InvarLock Version:** {invarlock_version}")
|
|
847
1511
|
env_flags = meta.get("env_flags")
|
|
848
|
-
if isinstance(env_flags, dict) and env_flags:
|
|
849
|
-
lines.append("- **Env Flags:**")
|
|
850
|
-
lines.append(" ```yaml")
|
|
851
|
-
for k, v in env_flags.items():
|
|
852
|
-
lines.append(f" {k}: {v}")
|
|
853
|
-
lines.append(" ```")
|
|
854
|
-
# Determinism flags (if present)
|
|
855
1512
|
cuda_flags = meta.get("cuda_flags")
|
|
1513
|
+
|
|
1514
|
+
# Compressed determinism/environment summary for readability
|
|
1515
|
+
det_parts: list[str] = []
|
|
1516
|
+
for label, keys in (
|
|
1517
|
+
("torch_det", ("torch_deterministic_algorithms", "deterministic_algorithms")),
|
|
1518
|
+
("cudnn_det", ("cudnn_deterministic",)),
|
|
1519
|
+
("cudnn_bench", ("cudnn_benchmark",)),
|
|
1520
|
+
("tf32_matmul", ("cuda_matmul_allow_tf32",)),
|
|
1521
|
+
("tf32_cudnn", ("cudnn_allow_tf32",)),
|
|
1522
|
+
("cublas_ws", ("CUBLAS_WORKSPACE_CONFIG",)),
|
|
1523
|
+
):
|
|
1524
|
+
val = None
|
|
1525
|
+
for key in keys:
|
|
1526
|
+
if isinstance(env_flags, dict) and env_flags.get(key) is not None:
|
|
1527
|
+
val = env_flags.get(key)
|
|
1528
|
+
break
|
|
1529
|
+
if isinstance(cuda_flags, dict) and cuda_flags.get(key) is not None:
|
|
1530
|
+
val = cuda_flags.get(key)
|
|
1531
|
+
break
|
|
1532
|
+
if val is not None:
|
|
1533
|
+
det_parts.append(f"{label}={val}")
|
|
1534
|
+
if det_parts:
|
|
1535
|
+
lines.append(f"- **Determinism:** {', '.join(det_parts)}")
|
|
1536
|
+
|
|
1537
|
+
full_flags: dict[str, Any] = {}
|
|
1538
|
+
if isinstance(env_flags, dict) and env_flags:
|
|
1539
|
+
full_flags["env_flags"] = env_flags
|
|
856
1540
|
if isinstance(cuda_flags, dict) and cuda_flags:
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
):
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
1541
|
+
full_flags["cuda_flags"] = cuda_flags
|
|
1542
|
+
if full_flags:
|
|
1543
|
+
lines.append("")
|
|
1544
|
+
lines.append("<details>")
|
|
1545
|
+
lines.append("<summary>Environment flags (full)</summary>")
|
|
1546
|
+
lines.append("")
|
|
1547
|
+
lines.append("```yaml")
|
|
1548
|
+
flags_yaml = yaml.safe_dump(full_flags, sort_keys=True, width=80).strip()
|
|
1549
|
+
for line in flags_yaml.splitlines():
|
|
1550
|
+
lines.append(line)
|
|
1551
|
+
lines.append("```")
|
|
1552
|
+
lines.append("")
|
|
1553
|
+
lines.append("</details>")
|
|
870
1554
|
lines.append("")
|
|
871
1555
|
|
|
872
1556
|
# Edit Configuration (removed duplicate Edit Information section)
|
|
@@ -890,267 +1574,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
890
1574
|
pass
|
|
891
1575
|
lines.append("")
|
|
892
1576
|
|
|
893
|
-
|
|
894
|
-
if resolved_policy:
|
|
895
|
-
lines.append("## Resolved Policy")
|
|
896
|
-
lines.append("")
|
|
897
|
-
lines.append("```yaml")
|
|
898
|
-
resolved_yaml = yaml.safe_dump(
|
|
899
|
-
resolved_policy, sort_keys=True, width=80, default_flow_style=False
|
|
900
|
-
).strip()
|
|
901
|
-
for line in resolved_yaml.splitlines():
|
|
902
|
-
lines.append(line)
|
|
903
|
-
lines.append("```")
|
|
904
|
-
lines.append("")
|
|
905
|
-
|
|
906
|
-
policy_provenance = certificate.get("policy_provenance", {})
|
|
907
|
-
if policy_provenance:
|
|
908
|
-
lines.append("## Policy Provenance")
|
|
909
|
-
lines.append("")
|
|
910
|
-
lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
|
|
911
|
-
overrides_list = policy_provenance.get("overrides") or []
|
|
912
|
-
if overrides_list:
|
|
913
|
-
lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
|
|
914
|
-
else:
|
|
915
|
-
lines.append("- **Overrides:** (none)")
|
|
916
|
-
digest_value = policy_provenance.get("policy_digest")
|
|
917
|
-
if digest_value:
|
|
918
|
-
lines.append(f"- **Policy Digest:** `{digest_value}`")
|
|
919
|
-
else:
|
|
920
|
-
lines.append("- **Policy Digest:** (not recorded)")
|
|
921
|
-
if policy_provenance.get("resolved_at"):
|
|
922
|
-
lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
|
|
923
|
-
lines.append("")
|
|
924
|
-
|
|
925
|
-
# Dataset Information
|
|
926
|
-
lines.append("## Dataset Configuration")
|
|
927
|
-
lines.append("")
|
|
928
|
-
dataset = certificate.get("dataset", {}) or {}
|
|
929
|
-
prov = (
|
|
930
|
-
(dataset.get("provider") or "unknown")
|
|
931
|
-
if isinstance(dataset, dict)
|
|
932
|
-
else "unknown"
|
|
933
|
-
)
|
|
934
|
-
lines.append(f"- **Provider:** {prov}")
|
|
935
|
-
try:
|
|
936
|
-
seq_len_val = (
|
|
937
|
-
int(dataset.get("seq_len"))
|
|
938
|
-
if isinstance(dataset.get("seq_len"), int | float)
|
|
939
|
-
else dataset.get("seq_len")
|
|
940
|
-
)
|
|
941
|
-
except Exception: # pragma: no cover - defensive
|
|
942
|
-
seq_len_val = dataset.get("seq_len")
|
|
943
|
-
if seq_len_val is not None:
|
|
944
|
-
lines.append(f"- **Sequence Length:** {seq_len_val}")
|
|
945
|
-
windows_blk = (
|
|
946
|
-
dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
|
|
947
|
-
)
|
|
948
|
-
win_prev = windows_blk.get("preview")
|
|
949
|
-
win_final = windows_blk.get("final")
|
|
950
|
-
if win_prev is not None and win_final is not None:
|
|
951
|
-
lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
|
|
952
|
-
if windows_blk.get("seed") is not None:
|
|
953
|
-
lines.append(f"- **Seed:** {windows_blk.get('seed')}")
|
|
954
|
-
hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
|
|
955
|
-
if hash_blk.get("preview_tokens") is not None:
|
|
956
|
-
lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
|
|
957
|
-
if hash_blk.get("final_tokens") is not None:
|
|
958
|
-
lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
|
|
959
|
-
if hash_blk.get("total_tokens") is not None:
|
|
960
|
-
lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
|
|
961
|
-
if hash_blk.get("dataset"):
|
|
962
|
-
lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
|
|
963
|
-
tokenizer = dataset.get("tokenizer", {})
|
|
964
|
-
if tokenizer.get("name") or tokenizer.get("hash"):
|
|
965
|
-
vocab_size = tokenizer.get("vocab_size")
|
|
966
|
-
vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
|
|
967
|
-
lines.append(
|
|
968
|
-
f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
|
|
969
|
-
)
|
|
970
|
-
if tokenizer.get("hash"):
|
|
971
|
-
lines.append(f" - Hash: {tokenizer['hash']}")
|
|
972
|
-
lines.append(
|
|
973
|
-
f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
|
|
974
|
-
)
|
|
975
|
-
if tokenizer.get("pad_token") is not None:
|
|
976
|
-
lines.append(f" - PAD: {tokenizer.get('pad_token')}")
|
|
977
|
-
if tokenizer.get("add_prefix_space") is not None:
|
|
978
|
-
lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
|
|
979
|
-
lines.append("")
|
|
980
|
-
|
|
981
|
-
provenance_info = certificate.get("provenance", {}) or {}
|
|
982
|
-
if provenance_info:
|
|
983
|
-
lines.append("## Run Provenance")
|
|
984
|
-
lines.append("")
|
|
985
|
-
baseline_info = provenance_info.get("baseline", {}) or {}
|
|
986
|
-
if baseline_info:
|
|
987
|
-
lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
|
|
988
|
-
if baseline_info.get("report_hash"):
|
|
989
|
-
lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
|
|
990
|
-
if baseline_info.get("report_path"):
|
|
991
|
-
lines.append(f" - Report Path: {baseline_info.get('report_path')}")
|
|
992
|
-
edited_info = provenance_info.get("edited", {}) or {}
|
|
993
|
-
if edited_info:
|
|
994
|
-
lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
|
|
995
|
-
if edited_info.get("report_hash"):
|
|
996
|
-
lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
|
|
997
|
-
if edited_info.get("report_path"):
|
|
998
|
-
lines.append(f" - Report Path: {edited_info.get('report_path')}")
|
|
999
|
-
window_plan = provenance_info.get("window_plan")
|
|
1000
|
-
if isinstance(window_plan, dict) and window_plan:
|
|
1001
|
-
preview_val = window_plan.get(
|
|
1002
|
-
"preview_n", window_plan.get("actual_preview")
|
|
1003
|
-
)
|
|
1004
|
-
final_val = window_plan.get("final_n", window_plan.get("actual_final"))
|
|
1005
|
-
lines.append(
|
|
1006
|
-
f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
|
|
1007
|
-
)
|
|
1008
|
-
provider_digest = provenance_info.get("provider_digest")
|
|
1009
|
-
if isinstance(provider_digest, dict) and provider_digest:
|
|
1010
|
-
ids_d = provider_digest.get("ids_sha256")
|
|
1011
|
-
tok_d = provider_digest.get("tokenizer_sha256")
|
|
1012
|
-
mask_d = provider_digest.get("masking_sha256")
|
|
1013
|
-
|
|
1014
|
-
lines.append("- **Provider Digest:**")
|
|
1015
|
-
if tok_d:
|
|
1016
|
-
lines.append(
|
|
1017
|
-
f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
|
|
1018
|
-
)
|
|
1019
|
-
if ids_d:
|
|
1020
|
-
lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
|
|
1021
|
-
if mask_d:
|
|
1022
|
-
lines.append(
|
|
1023
|
-
f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
|
|
1024
|
-
)
|
|
1025
|
-
# Surface confidence label prominently
|
|
1026
|
-
try:
|
|
1027
|
-
conf = certificate.get("confidence", {}) or {}
|
|
1028
|
-
if isinstance(conf, dict) and conf.get("label"):
|
|
1029
|
-
lines.append(f"- **Confidence:** {conf.get('label')}")
|
|
1030
|
-
except Exception:
|
|
1031
|
-
pass
|
|
1032
|
-
# Surface policy version + thresholds hash (short)
|
|
1033
|
-
try:
|
|
1034
|
-
pd = certificate.get("policy_digest", {}) or {}
|
|
1035
|
-
if isinstance(pd, dict) and pd:
|
|
1036
|
-
pv = pd.get("policy_version")
|
|
1037
|
-
th = pd.get("thresholds_hash")
|
|
1038
|
-
if pv:
|
|
1039
|
-
lines.append(f"- **Policy Version:** {pv}")
|
|
1040
|
-
if isinstance(th, str) and th:
|
|
1041
|
-
short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
|
|
1042
|
-
lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
|
|
1043
|
-
if pd.get("changed"):
|
|
1044
|
-
lines.append("- Note: policy changed")
|
|
1045
|
-
except Exception:
|
|
1046
|
-
pass
|
|
1047
|
-
lines.append("")
|
|
1577
|
+
_append_dataset_and_provenance_section(lines, certificate)
|
|
1048
1578
|
|
|
1049
1579
|
# Structural Changes heading is printed with content later; avoid empty header here
|
|
1050
1580
|
|
|
1051
|
-
# Primary Metric (metric-v1) snapshot, if present
|
|
1052
|
-
try:
|
|
1053
|
-
pm = certificate.get("primary_metric")
|
|
1054
|
-
if isinstance(pm, dict) and pm:
|
|
1055
|
-
kind = pm.get("kind", "unknown")
|
|
1056
|
-
lines.append(f"## Primary Metric ({kind})")
|
|
1057
|
-
lines.append("")
|
|
1058
|
-
unit = pm.get("unit", "-")
|
|
1059
|
-
paired = pm.get("paired", False)
|
|
1060
|
-
reps = None
|
|
1061
|
-
# Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
|
|
1062
|
-
# Mark estimated metrics (e.g., pseudo accuracy counts) clearly
|
|
1063
|
-
estimated_flag = False
|
|
1064
|
-
try:
|
|
1065
|
-
if bool(pm.get("estimated")):
|
|
1066
|
-
estimated_flag = True
|
|
1067
|
-
elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
|
|
1068
|
-
estimated_flag = True
|
|
1069
|
-
except Exception:
|
|
1070
|
-
estimated_flag = False
|
|
1071
|
-
est_suffix = " (estimated)" if estimated_flag else ""
|
|
1072
|
-
lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
|
|
1073
|
-
gating_basis = pm.get("gating_basis") or pm.get("basis")
|
|
1074
|
-
if gating_basis:
|
|
1075
|
-
lines.append(f"- Basis: {gating_basis}")
|
|
1076
|
-
if isinstance(paired, bool):
|
|
1077
|
-
lines.append(f"- Paired: {paired}")
|
|
1078
|
-
reps = pm.get("reps")
|
|
1079
|
-
if isinstance(reps, int | float):
|
|
1080
|
-
lines.append(f"- Bootstrap Reps: {int(reps)}")
|
|
1081
|
-
ci = pm.get("ci") or pm.get("display_ci")
|
|
1082
|
-
if (
|
|
1083
|
-
isinstance(ci, list | tuple)
|
|
1084
|
-
and len(ci) == 2
|
|
1085
|
-
and all(isinstance(x, int | float) for x in ci)
|
|
1086
|
-
):
|
|
1087
|
-
lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
|
|
1088
|
-
prev = pm.get("preview")
|
|
1089
|
-
fin = pm.get("final")
|
|
1090
|
-
ratio = pm.get("ratio_vs_baseline")
|
|
1091
|
-
|
|
1092
|
-
lines.append("")
|
|
1093
|
-
if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
|
|
1094
|
-
lines.append(
|
|
1095
|
-
"- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
|
|
1096
|
-
)
|
|
1097
|
-
lines.append("| Field | Value |")
|
|
1098
|
-
lines.append("|-------|-------|")
|
|
1099
|
-
lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
|
|
1100
|
-
lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
|
|
1101
|
-
# For accuracy, ratio field is actually a delta (as per helper); clarify inline
|
|
1102
|
-
if kind in {"accuracy", "vqa_accuracy"}:
|
|
1103
|
-
lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
|
|
1104
|
-
# When baseline accuracy is near-zero, clarify display rule
|
|
1105
|
-
try:
|
|
1106
|
-
base_pt = pm.get("baseline_point")
|
|
1107
|
-
if isinstance(base_pt, int | float) and base_pt < 0.05:
|
|
1108
|
-
lines.append(
|
|
1109
|
-
"- Note: baseline < 5%; ratio suppressed; showing Δpp"
|
|
1110
|
-
)
|
|
1111
|
-
except Exception:
|
|
1112
|
-
pass
|
|
1113
|
-
else:
|
|
1114
|
-
try:
|
|
1115
|
-
lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
|
|
1116
|
-
except Exception:
|
|
1117
|
-
lines.append("| Ratio vs Baseline | N/A |")
|
|
1118
|
-
lines.append("")
|
|
1119
|
-
# Secondary metrics (informational)
|
|
1120
|
-
try:
|
|
1121
|
-
secs = certificate.get("secondary_metrics")
|
|
1122
|
-
if isinstance(secs, list) and secs:
|
|
1123
|
-
lines.append("## Secondary Metrics (informational)")
|
|
1124
|
-
lines.append("")
|
|
1125
|
-
lines.append("| Kind | Preview | Final | vs Baseline | CI |")
|
|
1126
|
-
lines.append("|------|---------|-------|-------------|----|")
|
|
1127
|
-
for m in secs:
|
|
1128
|
-
if not isinstance(m, dict):
|
|
1129
|
-
continue
|
|
1130
|
-
k = m.get("kind", "?")
|
|
1131
|
-
pv = _fmt_by_kind(m.get("preview"), str(k))
|
|
1132
|
-
fv = _fmt_by_kind(m.get("final"), str(k))
|
|
1133
|
-
rb = m.get("ratio_vs_baseline")
|
|
1134
|
-
try:
|
|
1135
|
-
rb_str = (
|
|
1136
|
-
f"{float(rb):.3f}"
|
|
1137
|
-
if (str(k).startswith("ppl"))
|
|
1138
|
-
else _fmt_by_kind(rb, str(k))
|
|
1139
|
-
)
|
|
1140
|
-
except Exception:
|
|
1141
|
-
rb_str = "N/A"
|
|
1142
|
-
ci = m.get("display_ci") or m.get("ci")
|
|
1143
|
-
if isinstance(ci, tuple | list) and len(ci) == 2:
|
|
1144
|
-
ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
|
|
1145
|
-
else:
|
|
1146
|
-
ci_str = "–"
|
|
1147
|
-
lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
|
|
1148
|
-
lines.append("")
|
|
1149
|
-
except Exception:
|
|
1150
|
-
pass
|
|
1151
|
-
except Exception:
|
|
1152
|
-
pass
|
|
1153
|
-
|
|
1154
1581
|
# System Overhead section (latency/throughput)
|
|
1155
1582
|
sys_over = certificate.get("system_overhead", {}) or {}
|
|
1156
1583
|
if isinstance(sys_over, dict) and sys_over:
|
|
@@ -1309,31 +1736,32 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1309
1736
|
|
|
1310
1737
|
# Variance Guard (Spectral/RMT summaries are already provided above)
|
|
1311
1738
|
variance = certificate["variance"]
|
|
1312
|
-
|
|
1739
|
+
appendix_lines.append("### Variance Guard")
|
|
1740
|
+
appendix_lines.append("")
|
|
1313
1741
|
|
|
1314
1742
|
# Display whether VE was enabled after A/B test
|
|
1315
|
-
|
|
1743
|
+
appendix_lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
|
|
1316
1744
|
|
|
1317
1745
|
if variance["enabled"]:
|
|
1318
1746
|
# VE was enabled - show the gain
|
|
1319
1747
|
gain_value = variance.get("gain", "N/A")
|
|
1320
1748
|
if isinstance(gain_value, int | float):
|
|
1321
|
-
|
|
1749
|
+
appendix_lines.append(f"- **Gain:** {gain_value:.3f}")
|
|
1322
1750
|
else:
|
|
1323
|
-
|
|
1751
|
+
appendix_lines.append(f"- **Gain:** {gain_value}")
|
|
1324
1752
|
else:
|
|
1325
1753
|
# VE was not enabled - show succinct reason if available, else a clear disabled message
|
|
1326
1754
|
ppl_no_ve = variance.get("ppl_no_ve")
|
|
1327
1755
|
ppl_with_ve = variance.get("ppl_with_ve")
|
|
1328
1756
|
ratio_ci = variance.get("ratio_ci")
|
|
1329
1757
|
if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
|
|
1330
|
-
|
|
1331
|
-
|
|
1758
|
+
appendix_lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
|
|
1759
|
+
appendix_lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
|
|
1332
1760
|
gain_value = variance.get("gain")
|
|
1333
1761
|
if isinstance(gain_value, int | float):
|
|
1334
|
-
|
|
1762
|
+
appendix_lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
|
|
1335
1763
|
else:
|
|
1336
|
-
|
|
1764
|
+
appendix_lines.append(
|
|
1337
1765
|
"- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
|
|
1338
1766
|
)
|
|
1339
1767
|
# Add concise rationale aligned with Balanced predictive gate contract
|
|
@@ -1341,14 +1769,14 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1341
1769
|
ve_policy = certificate.get("policies", {}).get("variance", {})
|
|
1342
1770
|
min_effect = ve_policy.get("min_effect_lognll")
|
|
1343
1771
|
if isinstance(min_effect, int | float):
|
|
1344
|
-
|
|
1772
|
+
appendix_lines.append(
|
|
1345
1773
|
f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
|
|
1346
1774
|
)
|
|
1347
1775
|
else:
|
|
1348
|
-
|
|
1776
|
+
appendix_lines.append(
|
|
1349
1777
|
"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
|
|
1350
1778
|
)
|
|
1351
|
-
|
|
1779
|
+
appendix_lines.append(
|
|
1352
1780
|
"- Predictive Gate: evaluated=false (disabled under current policy/edit)."
|
|
1353
1781
|
)
|
|
1354
1782
|
except Exception:
|
|
@@ -1356,14 +1784,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1356
1784
|
|
|
1357
1785
|
if variance.get("ratio_ci"):
|
|
1358
1786
|
ratio_lo, ratio_hi = variance["ratio_ci"]
|
|
1359
|
-
|
|
1787
|
+
appendix_lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
|
|
1360
1788
|
|
|
1361
1789
|
if variance.get("calibration") and variance.get("enabled"):
|
|
1362
1790
|
calib = variance["calibration"]
|
|
1363
1791
|
coverage = calib.get("coverage")
|
|
1364
1792
|
requested = calib.get("requested")
|
|
1365
1793
|
status = calib.get("status", "unknown")
|
|
1366
|
-
|
|
1794
|
+
appendix_lines.append(
|
|
1795
|
+
f"- **Calibration:** {coverage}/{requested} windows ({status})"
|
|
1796
|
+
)
|
|
1797
|
+
appendix_lines.append("")
|
|
1367
1798
|
|
|
1368
1799
|
lines.append("")
|
|
1369
1800
|
|
|
@@ -1397,32 +1828,22 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1397
1828
|
lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
|
|
1398
1829
|
lines.append("")
|
|
1399
1830
|
|
|
1400
|
-
|
|
1401
|
-
lines.append("## Applied Policies")
|
|
1402
|
-
lines.append("")
|
|
1403
|
-
policies = certificate["policies"]
|
|
1404
|
-
for guard_name, policy in policies.items():
|
|
1405
|
-
lines.append(f"### {guard_name.title()}")
|
|
1406
|
-
lines.append("")
|
|
1407
|
-
policy_yaml = (
|
|
1408
|
-
yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
|
|
1409
|
-
)
|
|
1410
|
-
lines.append("```yaml")
|
|
1411
|
-
for line in policy_yaml:
|
|
1412
|
-
lines.append(line)
|
|
1413
|
-
lines.append("```")
|
|
1414
|
-
lines.append("")
|
|
1831
|
+
_append_policy_configuration_section(lines, certificate)
|
|
1415
1832
|
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
lines.append("")
|
|
1833
|
+
appendix_lines.append("### Artifacts")
|
|
1834
|
+
appendix_lines.append("")
|
|
1419
1835
|
artifacts = certificate["artifacts"]
|
|
1420
1836
|
if artifacts.get("events_path"):
|
|
1421
|
-
|
|
1837
|
+
appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
|
|
1422
1838
|
if artifacts.get("report_path"):
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1839
|
+
appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
|
|
1840
|
+
appendix_lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
|
|
1841
|
+
appendix_lines.append("")
|
|
1842
|
+
|
|
1843
|
+
if appendix_lines:
|
|
1844
|
+
lines.append("## Appendix")
|
|
1845
|
+
lines.append("")
|
|
1846
|
+
lines.extend(appendix_lines)
|
|
1426
1847
|
|
|
1427
1848
|
# Certificate Hash for Integrity
|
|
1428
1849
|
cert_hash = _compute_certificate_hash(certificate)
|
|
@@ -1433,7 +1854,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
|
1433
1854
|
lines.append("---")
|
|
1434
1855
|
lines.append("")
|
|
1435
1856
|
lines.append(
|
|
1436
|
-
"*This InvarLock
|
|
1857
|
+
"*This InvarLock evaluation certificate provides a comprehensive assessment of model compression safety.*"
|
|
1437
1858
|
)
|
|
1438
1859
|
lines.append(
|
|
1439
1860
|
"*All metrics are compared against the uncompressed baseline model for safety validation.*"
|