invarlock 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +33 -0
- invarlock/__main__.py +10 -0
- invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
- invarlock/_data/runtime/profiles/release.yaml +23 -0
- invarlock/_data/runtime/tiers.yaml +76 -0
- invarlock/adapters/__init__.py +102 -0
- invarlock/adapters/_capabilities.py +45 -0
- invarlock/adapters/auto.py +99 -0
- invarlock/adapters/base.py +530 -0
- invarlock/adapters/base_types.py +85 -0
- invarlock/adapters/hf_bert.py +852 -0
- invarlock/adapters/hf_gpt2.py +403 -0
- invarlock/adapters/hf_llama.py +485 -0
- invarlock/adapters/hf_mixin.py +383 -0
- invarlock/adapters/hf_onnx.py +112 -0
- invarlock/adapters/hf_t5.py +137 -0
- invarlock/adapters/py.typed +1 -0
- invarlock/assurance/__init__.py +43 -0
- invarlock/cli/__init__.py +8 -0
- invarlock/cli/__main__.py +8 -0
- invarlock/cli/_evidence.py +25 -0
- invarlock/cli/_json.py +75 -0
- invarlock/cli/adapter_auto.py +162 -0
- invarlock/cli/app.py +287 -0
- invarlock/cli/commands/__init__.py +26 -0
- invarlock/cli/commands/certify.py +403 -0
- invarlock/cli/commands/doctor.py +1358 -0
- invarlock/cli/commands/explain_gates.py +151 -0
- invarlock/cli/commands/export_html.py +100 -0
- invarlock/cli/commands/plugins.py +1331 -0
- invarlock/cli/commands/report.py +354 -0
- invarlock/cli/commands/run.py +4146 -0
- invarlock/cli/commands/verify.py +1040 -0
- invarlock/cli/config.py +396 -0
- invarlock/cli/constants.py +68 -0
- invarlock/cli/device.py +92 -0
- invarlock/cli/doctor_helpers.py +74 -0
- invarlock/cli/errors.py +6 -0
- invarlock/cli/overhead_utils.py +60 -0
- invarlock/cli/provenance.py +66 -0
- invarlock/cli/utils.py +41 -0
- invarlock/config.py +56 -0
- invarlock/core/__init__.py +62 -0
- invarlock/core/abi.py +15 -0
- invarlock/core/api.py +274 -0
- invarlock/core/auto_tuning.py +317 -0
- invarlock/core/bootstrap.py +226 -0
- invarlock/core/checkpoint.py +221 -0
- invarlock/core/contracts.py +73 -0
- invarlock/core/error_utils.py +64 -0
- invarlock/core/events.py +298 -0
- invarlock/core/exceptions.py +95 -0
- invarlock/core/registry.py +481 -0
- invarlock/core/retry.py +146 -0
- invarlock/core/runner.py +2041 -0
- invarlock/core/types.py +154 -0
- invarlock/edits/__init__.py +12 -0
- invarlock/edits/_edit_utils.py +249 -0
- invarlock/edits/_external_utils.py +268 -0
- invarlock/edits/noop.py +47 -0
- invarlock/edits/py.typed +1 -0
- invarlock/edits/quant_rtn.py +801 -0
- invarlock/edits/registry.py +166 -0
- invarlock/eval/__init__.py +23 -0
- invarlock/eval/bench.py +1207 -0
- invarlock/eval/bootstrap.py +50 -0
- invarlock/eval/data.py +2052 -0
- invarlock/eval/metrics.py +2167 -0
- invarlock/eval/primary_metric.py +767 -0
- invarlock/eval/probes/__init__.py +24 -0
- invarlock/eval/probes/fft.py +139 -0
- invarlock/eval/probes/mi.py +213 -0
- invarlock/eval/probes/post_attention.py +323 -0
- invarlock/eval/providers/base.py +67 -0
- invarlock/eval/providers/seq2seq.py +111 -0
- invarlock/eval/providers/text_lm.py +113 -0
- invarlock/eval/providers/vision_text.py +93 -0
- invarlock/eval/py.typed +1 -0
- invarlock/guards/__init__.py +18 -0
- invarlock/guards/_contracts.py +9 -0
- invarlock/guards/invariants.py +640 -0
- invarlock/guards/policies.py +805 -0
- invarlock/guards/py.typed +1 -0
- invarlock/guards/rmt.py +2097 -0
- invarlock/guards/spectral.py +1419 -0
- invarlock/guards/tier_config.py +354 -0
- invarlock/guards/variance.py +3298 -0
- invarlock/guards_ref/__init__.py +15 -0
- invarlock/guards_ref/rmt_ref.py +40 -0
- invarlock/guards_ref/spectral_ref.py +135 -0
- invarlock/guards_ref/variance_ref.py +60 -0
- invarlock/model_profile.py +353 -0
- invarlock/model_utils.py +221 -0
- invarlock/observability/__init__.py +10 -0
- invarlock/observability/alerting.py +535 -0
- invarlock/observability/core.py +546 -0
- invarlock/observability/exporters.py +565 -0
- invarlock/observability/health.py +588 -0
- invarlock/observability/metrics.py +457 -0
- invarlock/observability/py.typed +1 -0
- invarlock/observability/utils.py +553 -0
- invarlock/plugins/__init__.py +12 -0
- invarlock/plugins/hello_guard.py +33 -0
- invarlock/plugins/hf_awq_adapter.py +82 -0
- invarlock/plugins/hf_bnb_adapter.py +79 -0
- invarlock/plugins/hf_gptq_adapter.py +78 -0
- invarlock/plugins/py.typed +1 -0
- invarlock/py.typed +1 -0
- invarlock/reporting/__init__.py +7 -0
- invarlock/reporting/certificate.py +3221 -0
- invarlock/reporting/certificate_schema.py +244 -0
- invarlock/reporting/dataset_hashing.py +215 -0
- invarlock/reporting/guards_analysis.py +948 -0
- invarlock/reporting/html.py +32 -0
- invarlock/reporting/normalizer.py +235 -0
- invarlock/reporting/policy_utils.py +517 -0
- invarlock/reporting/primary_metric_utils.py +265 -0
- invarlock/reporting/render.py +1442 -0
- invarlock/reporting/report.py +903 -0
- invarlock/reporting/report_types.py +278 -0
- invarlock/reporting/utils.py +175 -0
- invarlock/reporting/validate.py +631 -0
- invarlock/security.py +176 -0
- invarlock/sparsity_utils.py +323 -0
- invarlock/utils/__init__.py +150 -0
- invarlock/utils/digest.py +45 -0
- invarlock-0.2.0.dist-info/METADATA +586 -0
- invarlock-0.2.0.dist-info/RECORD +132 -0
- invarlock-0.2.0.dist-info/WHEEL +5 -0
- invarlock-0.2.0.dist-info/entry_points.txt +20 -0
- invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
- invarlock-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1442 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
# mypy: ignore-errors
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
# Import certificate module for helper access without creating hard cycles
|
|
13
|
+
from . import certificate as C
|
|
14
|
+
|
|
15
|
+
# Console Validation Block helpers (allow-list driven)
|
|
16
|
+
_CONSOLE_LABELS_DEFAULT = [
|
|
17
|
+
"Primary Metric Acceptable",
|
|
18
|
+
"Preview Final Drift Acceptable",
|
|
19
|
+
"Guard Overhead Acceptable",
|
|
20
|
+
"Invariants Pass",
|
|
21
|
+
"Spectral Stable",
|
|
22
|
+
"Rmt Stable",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _load_console_labels() -> list[str]:
|
|
27
|
+
"""Load console labels allow-list from contracts with a safe fallback."""
|
|
28
|
+
try:
|
|
29
|
+
root = Path(__file__).resolve().parents[3]
|
|
30
|
+
path = root / "contracts" / "console_labels.json"
|
|
31
|
+
if path.exists():
|
|
32
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
33
|
+
if isinstance(data, list) and all(isinstance(x, str) for x in data):
|
|
34
|
+
return list(data)
|
|
35
|
+
except Exception:
|
|
36
|
+
pass
|
|
37
|
+
return list(_CONSOLE_LABELS_DEFAULT)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]:
|
|
41
|
+
"""Produce a normalized console validation block from a certificate.
|
|
42
|
+
|
|
43
|
+
Returns a dict with keys:
|
|
44
|
+
- labels: the canonical label list
|
|
45
|
+
- rows: list of {label, status, evaluated, ok}
|
|
46
|
+
- overall_pass: boolean computed from canonical rows only. Guard Overhead is
|
|
47
|
+
counted only when evaluated.
|
|
48
|
+
"""
|
|
49
|
+
labels = _load_console_labels()
|
|
50
|
+
validation = certificate.get("validation", {}) or {}
|
|
51
|
+
guard_ctx = certificate.get("guard_overhead", {}) or {}
|
|
52
|
+
guard_evaluated = (
|
|
53
|
+
bool(guard_ctx.get("evaluated")) if isinstance(guard_ctx, dict) else False
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Map label → validation key
|
|
57
|
+
def _to_key(label: str) -> str:
|
|
58
|
+
return label.strip().lower().replace(" ", "_")
|
|
59
|
+
|
|
60
|
+
rows: list[dict[str, Any]] = []
|
|
61
|
+
ok_map: dict[str, bool] = {}
|
|
62
|
+
effective_labels: list[str] = []
|
|
63
|
+
for label in labels:
|
|
64
|
+
key = _to_key(label)
|
|
65
|
+
ok = bool(validation.get(key, False))
|
|
66
|
+
status: str
|
|
67
|
+
evaluated = True
|
|
68
|
+
if key == "guard_overhead_acceptable":
|
|
69
|
+
evaluated = guard_evaluated
|
|
70
|
+
if not evaluated:
|
|
71
|
+
# Omit row entirely when not evaluated (policy/profile skipped)
|
|
72
|
+
continue
|
|
73
|
+
status = "✅ PASS" if ok else "❌ FAIL"
|
|
74
|
+
else:
|
|
75
|
+
status = "✅ PASS" if ok else "❌ FAIL"
|
|
76
|
+
rows.append(
|
|
77
|
+
{"label": label, "status": status, "evaluated": evaluated, "ok": ok}
|
|
78
|
+
)
|
|
79
|
+
effective_labels.append(label)
|
|
80
|
+
ok_map[key] = ok
|
|
81
|
+
|
|
82
|
+
# Overall policy from canonical rows only; exclude guard when not evaluated
|
|
83
|
+
keys_for_overall = [
|
|
84
|
+
"primary_metric_acceptable",
|
|
85
|
+
"preview_final_drift_acceptable",
|
|
86
|
+
"invariants_pass",
|
|
87
|
+
"spectral_stable",
|
|
88
|
+
"rmt_stable",
|
|
89
|
+
]
|
|
90
|
+
# Include guard overhead only if evaluated
|
|
91
|
+
if guard_evaluated:
|
|
92
|
+
keys_for_overall.append("guard_overhead_acceptable")
|
|
93
|
+
|
|
94
|
+
overall_pass = all(ok_map.get(k, False) for k in keys_for_overall)
|
|
95
|
+
|
|
96
|
+
return {"labels": effective_labels, "rows": rows, "overall_pass": overall_pass}
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _format_plugin(plugin: dict[str, Any]) -> str:
|
|
100
|
+
"""Format a plugin entry for markdown list rendering."""
|
|
101
|
+
name = plugin.get("name", "unknown")
|
|
102
|
+
version = plugin.get("version") or "-"
|
|
103
|
+
module = plugin.get("module") or "unknown"
|
|
104
|
+
entry = plugin.get("entry_point")
|
|
105
|
+
pieces = [f"**{name}** v{version}", f"`{module}`"]
|
|
106
|
+
if entry:
|
|
107
|
+
pieces.append(f"[{entry}]")
|
|
108
|
+
return " ".join(pieces)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _short_digest(v: str) -> str:
|
|
112
|
+
v = str(v)
|
|
113
|
+
return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _fmt_by_kind(x: Any, k: str) -> str:
|
|
117
|
+
try:
|
|
118
|
+
xv = float(x)
|
|
119
|
+
except Exception:
|
|
120
|
+
return "N/A"
|
|
121
|
+
k = str(k).lower()
|
|
122
|
+
if k in {"accuracy", "vqa_accuracy"}:
|
|
123
|
+
return f"{xv * 100.0:.1f}"
|
|
124
|
+
if k.startswith("ppl"):
|
|
125
|
+
return f"{xv:.3g}"
|
|
126
|
+
return f"{xv:.3f}"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _fmtv(key: str, v: Any) -> str:
|
|
130
|
+
if not (isinstance(v, int | float) and math.isfinite(float(v))):
|
|
131
|
+
return "-"
|
|
132
|
+
if key.startswith("latency_ms_"):
|
|
133
|
+
return f"{float(v):.0f}"
|
|
134
|
+
if key.startswith("throughput_"):
|
|
135
|
+
return f"{float(v):.1f}"
|
|
136
|
+
return f"{float(v):.3f}"
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _p(x: Any) -> str:
|
|
140
|
+
try:
|
|
141
|
+
return f"{float(x) * 100.0:.1f}%"
|
|
142
|
+
except Exception:
|
|
143
|
+
return "N/A"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _append_system_overhead_section(lines: list[str], sys_over: dict[str, Any]) -> None:
|
|
147
|
+
"""Append the System Overhead markdown section to lines given a payload."""
|
|
148
|
+
if not (isinstance(sys_over, dict) and sys_over):
|
|
149
|
+
return
|
|
150
|
+
lines.append("## System Overhead")
|
|
151
|
+
lines.append("")
|
|
152
|
+
lines.append("| Metric | Baseline | Edited | Δ | Ratio |")
|
|
153
|
+
lines.append("|--------|----------|--------|---|-------|")
|
|
154
|
+
|
|
155
|
+
mapping = {
|
|
156
|
+
"latency_ms_p50": "Latency p50 (ms)",
|
|
157
|
+
"latency_ms_p95": "Latency p95 (ms)",
|
|
158
|
+
"throughput_sps": "Throughput (samples/s)",
|
|
159
|
+
}
|
|
160
|
+
for key, label in mapping.items():
|
|
161
|
+
ent = sys_over.get(key)
|
|
162
|
+
if not isinstance(ent, dict):
|
|
163
|
+
continue
|
|
164
|
+
b_raw = ent.get("baseline")
|
|
165
|
+
e_raw = ent.get("edited")
|
|
166
|
+
# If both baseline and edited are missing or zero, present N/A to avoid implying measured zeros
|
|
167
|
+
try:
|
|
168
|
+
b_val = float(b_raw)
|
|
169
|
+
except Exception:
|
|
170
|
+
b_val = float("nan")
|
|
171
|
+
try:
|
|
172
|
+
e_val = float(e_raw)
|
|
173
|
+
except Exception:
|
|
174
|
+
e_val = float("nan")
|
|
175
|
+
if (not math.isfinite(b_val) or b_val == 0.0) and (
|
|
176
|
+
not math.isfinite(e_val) or e_val == 0.0
|
|
177
|
+
):
|
|
178
|
+
b_str = e_str = d_str = r_str = "N/A"
|
|
179
|
+
else:
|
|
180
|
+
b_str = _fmtv(key, b_val)
|
|
181
|
+
e_str = _fmtv(key, e_val)
|
|
182
|
+
d = ent.get("delta")
|
|
183
|
+
r = ent.get("ratio")
|
|
184
|
+
d_str = _fmtv(key, d) if isinstance(d, int | float) else "-"
|
|
185
|
+
r_str = _fmtv(key, r) if isinstance(r, int | float) else "-"
|
|
186
|
+
lines.append(f"| {label} | {b_str} | {e_str} | {d_str} | {r_str} |")
|
|
187
|
+
lines.append("")
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _append_accuracy_subgroups(lines: list[str], subgroups: dict[str, Any]) -> None:
|
|
191
|
+
"""Append the Accuracy Subgroups markdown table given a subgroups payload."""
|
|
192
|
+
if not (isinstance(subgroups, dict) and subgroups):
|
|
193
|
+
return
|
|
194
|
+
lines.append("## Accuracy Subgroups (informational)")
|
|
195
|
+
lines.append("")
|
|
196
|
+
lines.append("| Group | n(prev) | n(final) | Acc(prev) | Acc(final) | Δpp |")
|
|
197
|
+
lines.append("|-------|---------|----------|-----------|------------|-----|")
|
|
198
|
+
for g, rec in subgroups.items():
|
|
199
|
+
try:
|
|
200
|
+
npv = int(rec.get("n_preview", 0))
|
|
201
|
+
except Exception:
|
|
202
|
+
npv = 0
|
|
203
|
+
try:
|
|
204
|
+
nfi = int(rec.get("n_final", 0))
|
|
205
|
+
except Exception:
|
|
206
|
+
nfi = 0
|
|
207
|
+
dp = rec.get("delta_pp")
|
|
208
|
+
try:
|
|
209
|
+
dp_str = f"{float(dp):+.1f} pp"
|
|
210
|
+
except Exception:
|
|
211
|
+
dp_str = "N/A"
|
|
212
|
+
lines.append(
|
|
213
|
+
f"| {g} | {npv} | {nfi} | {_p(rec.get('preview'))} | {_p(rec.get('final'))} | {dp_str} |"
|
|
214
|
+
)
|
|
215
|
+
lines.append("")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
|
|
219
|
+
"""Compute integrity hash for the certificate.
|
|
220
|
+
|
|
221
|
+
Hash ignores the `artifacts` section for stability across saves.
|
|
222
|
+
"""
|
|
223
|
+
# Create a copy without the artifacts section for stable hashing
|
|
224
|
+
cert_copy = dict(certificate or {})
|
|
225
|
+
cert_copy.pop("artifacts", None)
|
|
226
|
+
|
|
227
|
+
# Sort keys for deterministic hashing
|
|
228
|
+
cert_str = json.dumps(cert_copy, sort_keys=True)
|
|
229
|
+
import hashlib as _hash
|
|
230
|
+
|
|
231
|
+
return _hash.sha256(cert_str.encode()).hexdigest()[:16]
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
|
|
235
|
+
"""Build a small, reusable console summary pack from a certificate.
|
|
236
|
+
|
|
237
|
+
Returns a dict with:
|
|
238
|
+
- overall_pass: bool
|
|
239
|
+
- overall_line: human-friendly overall status line
|
|
240
|
+
- gate_lines: list of "<Label>: <Status>" strings for each evaluated gate
|
|
241
|
+
- labels: the canonical label list used
|
|
242
|
+
"""
|
|
243
|
+
block = compute_console_validation_block(certificate)
|
|
244
|
+
overall_pass = bool(block.get("overall_pass"))
|
|
245
|
+
emoji = "✅" if overall_pass else "❌"
|
|
246
|
+
overall_line = f"Overall Status: {emoji} {'PASS' if overall_pass else 'FAIL'}"
|
|
247
|
+
|
|
248
|
+
gate_lines: list[str] = []
|
|
249
|
+
for row in block.get("rows", []) or []:
|
|
250
|
+
if not isinstance(row, dict):
|
|
251
|
+
continue
|
|
252
|
+
label = row.get("label", "Gate")
|
|
253
|
+
status = row.get("status", "")
|
|
254
|
+
gate_lines.append(f"{label}: {status}")
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
"overall_pass": overall_pass,
|
|
258
|
+
"overall_line": overall_line,
|
|
259
|
+
"gate_lines": gate_lines,
|
|
260
|
+
"labels": block.get("labels", []),
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def render_certificate_markdown(certificate: dict[str, Any]) -> str:
|
|
265
|
+
"""
|
|
266
|
+
Render a certificate as a formatted Markdown report with pretty tables.
|
|
267
|
+
|
|
268
|
+
This implementation is moved from certificate.py to keep that module lean.
|
|
269
|
+
To avoid circular import issues, we alias helpers from the certificate
|
|
270
|
+
module inside the function body.
|
|
271
|
+
"""
|
|
272
|
+
# Alias frequently used helpers locally to avoid editing the large body
|
|
273
|
+
validate_certificate = C.validate_certificate
|
|
274
|
+
|
|
275
|
+
if not validate_certificate(certificate):
|
|
276
|
+
raise ValueError("Invalid certificate structure")
|
|
277
|
+
|
|
278
|
+
lines = []
|
|
279
|
+
edit_name = str(certificate.get("edit_name") or "").lower()
|
|
280
|
+
|
|
281
|
+
# Header
|
|
282
|
+
lines.append("# InvarLock Safety Certificate")
|
|
283
|
+
lines.append("")
|
|
284
|
+
lines.append(
|
|
285
|
+
"> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
|
|
286
|
+
"upper bound; “point & upper” requires both to pass.*"
|
|
287
|
+
)
|
|
288
|
+
lines.append("")
|
|
289
|
+
lines.append(f"**Schema Version:** {certificate['schema_version']}")
|
|
290
|
+
lines.append(f"**Run ID:** `{certificate['run_id']}`")
|
|
291
|
+
lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
|
|
292
|
+
lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
|
|
293
|
+
lines.append("")
|
|
294
|
+
|
|
295
|
+
plugins = certificate.get("plugins", {})
|
|
296
|
+
if isinstance(plugins, dict) and plugins:
|
|
297
|
+
lines.append("## Plugin Provenance")
|
|
298
|
+
lines.append("")
|
|
299
|
+
|
|
300
|
+
adapter_plugin = plugins.get("adapter")
|
|
301
|
+
if isinstance(adapter_plugin, dict):
|
|
302
|
+
lines.append(f"- Adapter: {_format_plugin(adapter_plugin)}")
|
|
303
|
+
|
|
304
|
+
edit_plugin = plugins.get("edit")
|
|
305
|
+
if isinstance(edit_plugin, dict):
|
|
306
|
+
lines.append(f"- Edit: {_format_plugin(edit_plugin)}")
|
|
307
|
+
|
|
308
|
+
guard_plugins = plugins.get("guards")
|
|
309
|
+
if isinstance(guard_plugins, list) and guard_plugins:
|
|
310
|
+
guard_entries = [
|
|
311
|
+
_format_plugin(plugin)
|
|
312
|
+
for plugin in guard_plugins
|
|
313
|
+
if isinstance(plugin, dict)
|
|
314
|
+
]
|
|
315
|
+
if guard_entries:
|
|
316
|
+
lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
|
|
317
|
+
lines.append("")
|
|
318
|
+
|
|
319
|
+
# Executive Summary with validation status (canonical, from console block)
|
|
320
|
+
lines.append("## Executive Summary")
|
|
321
|
+
lines.append("")
|
|
322
|
+
_block = compute_console_validation_block(certificate)
|
|
323
|
+
overall_pass = bool(_block.get("overall_pass"))
|
|
324
|
+
status_emoji = "✅" if overall_pass else "❌"
|
|
325
|
+
lines.append(
|
|
326
|
+
f"**Overall Status:** {status_emoji} {'PASS' if overall_pass else 'FAIL'}"
|
|
327
|
+
)
|
|
328
|
+
# Window Plan one-liner for quick audit
|
|
329
|
+
try:
|
|
330
|
+
plan_ctx = (
|
|
331
|
+
certificate.get("window_plan")
|
|
332
|
+
or certificate.get("dataset", {}).get("windows", {})
|
|
333
|
+
or certificate.get("ppl", {}).get("window_plan")
|
|
334
|
+
)
|
|
335
|
+
seq_len = certificate.get("dataset", {}).get("seq_len") or certificate.get(
|
|
336
|
+
"dataset", {}
|
|
337
|
+
).get("sequence_length")
|
|
338
|
+
if isinstance(plan_ctx, dict):
|
|
339
|
+
profile = plan_ctx.get("profile")
|
|
340
|
+
preview_n = (
|
|
341
|
+
plan_ctx.get("preview_n")
|
|
342
|
+
if plan_ctx.get("preview_n") is not None
|
|
343
|
+
else plan_ctx.get("actual_preview")
|
|
344
|
+
)
|
|
345
|
+
final_n = (
|
|
346
|
+
plan_ctx.get("final_n")
|
|
347
|
+
if plan_ctx.get("final_n") is not None
|
|
348
|
+
else plan_ctx.get("actual_final")
|
|
349
|
+
)
|
|
350
|
+
lines.append(
|
|
351
|
+
f"- Window Plan: {profile}, {preview_n}/{final_n}{', seq_len=' + str(seq_len) if seq_len else ''}"
|
|
352
|
+
)
|
|
353
|
+
except Exception:
|
|
354
|
+
pass
|
|
355
|
+
lines.append("")
|
|
356
|
+
|
|
357
|
+
# Validation table with canonical gates (mirrors console allow-list)
|
|
358
|
+
lines.append("## Quality Gates")
|
|
359
|
+
lines.append("")
|
|
360
|
+
lines.append("| Gate | Status | Measured | Threshold | Basis | Description |")
|
|
361
|
+
lines.append("|------|--------|----------|-----------|-------|-------------|")
|
|
362
|
+
|
|
363
|
+
pm_block = certificate.get("primary_metric", {}) or {}
|
|
364
|
+
has_pm = isinstance(pm_block, dict) and bool(pm_block)
|
|
365
|
+
auto_info = certificate.get("auto", {})
|
|
366
|
+
tier = (auto_info.get("tier") or "balanced").lower()
|
|
367
|
+
|
|
368
|
+
# Helper to emit Primary Metric Acceptable row
|
|
369
|
+
def _emit_pm_gate_row() -> None:
|
|
370
|
+
pm_kind = str(pm_block.get("kind", "")).lower()
|
|
371
|
+
value = pm_block.get("ratio_vs_baseline")
|
|
372
|
+
gating_basis = pm_block.get("gating_basis") or "point"
|
|
373
|
+
ok = bool(
|
|
374
|
+
certificate.get("validation", {}).get("primary_metric_acceptable", True)
|
|
375
|
+
)
|
|
376
|
+
status = "✅ PASS" if ok else "❌ FAIL"
|
|
377
|
+
if pm_kind in {"accuracy", "vqa_accuracy"}:
|
|
378
|
+
measured = f"{value:+.2f} pp" if isinstance(value, int | float) else "N/A"
|
|
379
|
+
th_map = {
|
|
380
|
+
"conservative": -0.5,
|
|
381
|
+
"balanced": -1.0,
|
|
382
|
+
"aggressive": -2.0,
|
|
383
|
+
"none": -1.0,
|
|
384
|
+
}
|
|
385
|
+
th = th_map.get(tier, -1.0)
|
|
386
|
+
lines.append(
|
|
387
|
+
f"| Primary Metric Acceptable | {status} | {measured} | ≥ {th:+.2f} pp | {gating_basis} | Δ accuracy vs baseline |"
|
|
388
|
+
)
|
|
389
|
+
else:
|
|
390
|
+
tier_thresholds = {
|
|
391
|
+
"conservative": 1.05,
|
|
392
|
+
"balanced": 1.10,
|
|
393
|
+
"aggressive": 1.20,
|
|
394
|
+
"none": 1.10,
|
|
395
|
+
}
|
|
396
|
+
ratio_limit = tier_thresholds.get(tier, 1.10)
|
|
397
|
+
target_ratio = auto_info.get("target_pm_ratio")
|
|
398
|
+
if isinstance(target_ratio, int | float) and target_ratio > 0:
|
|
399
|
+
ratio_limit = min(ratio_limit, float(target_ratio))
|
|
400
|
+
measured = f"{value:.3f}x" if isinstance(value, int | float) else "N/A"
|
|
401
|
+
lines.append(
|
|
402
|
+
f"| Primary Metric Acceptable | {status} | {measured} | ≤ {ratio_limit:.2f}x | {gating_basis} | Ratio vs baseline |"
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
# Helper to emit Preview Final Drift Acceptable row
|
|
406
|
+
def _emit_drift_gate_row() -> None:
|
|
407
|
+
ok = bool(
|
|
408
|
+
certificate.get("validation", {}).get(
|
|
409
|
+
"preview_final_drift_acceptable", True
|
|
410
|
+
)
|
|
411
|
+
)
|
|
412
|
+
status = "✅ PASS" if ok else "❌ FAIL"
|
|
413
|
+
# Compute drift from PM preview/final when available
|
|
414
|
+
try:
|
|
415
|
+
pv = (
|
|
416
|
+
float(pm_block.get("preview"))
|
|
417
|
+
if isinstance(pm_block.get("preview"), int | float)
|
|
418
|
+
else float("nan")
|
|
419
|
+
)
|
|
420
|
+
fv = (
|
|
421
|
+
float(pm_block.get("final"))
|
|
422
|
+
if isinstance(pm_block.get("final"), int | float)
|
|
423
|
+
else float("nan")
|
|
424
|
+
)
|
|
425
|
+
drift = (
|
|
426
|
+
fv / pv
|
|
427
|
+
if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
|
|
428
|
+
else float("nan")
|
|
429
|
+
)
|
|
430
|
+
except Exception:
|
|
431
|
+
drift = float("nan")
|
|
432
|
+
measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
|
|
433
|
+
lines.append(
|
|
434
|
+
f"| Preview Final Drift Acceptable | {status} | {measured} | 0.95–1.05x | point | Final/Preview ratio stability |"
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
# Helper to emit Guard Overhead Acceptable row (only when evaluated)
|
|
438
|
+
def _emit_overhead_gate_row() -> None:
|
|
439
|
+
guard_overhead = certificate.get("guard_overhead", {}) or {}
|
|
440
|
+
evaluated = bool(guard_overhead.get("evaluated"))
|
|
441
|
+
if not evaluated:
|
|
442
|
+
return
|
|
443
|
+
ok = bool(
|
|
444
|
+
certificate.get("validation", {}).get("guard_overhead_acceptable", True)
|
|
445
|
+
)
|
|
446
|
+
status = "✅ PASS" if ok else "❌ FAIL"
|
|
447
|
+
overhead_pct = guard_overhead.get("overhead_percent")
|
|
448
|
+
overhead_ratio = guard_overhead.get("overhead_ratio")
|
|
449
|
+
if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
|
|
450
|
+
measured = f"{float(overhead_pct):+.2f}%"
|
|
451
|
+
elif isinstance(overhead_ratio, int | float) and math.isfinite(
|
|
452
|
+
float(overhead_ratio)
|
|
453
|
+
):
|
|
454
|
+
measured = f"{float(overhead_ratio):.3f}x"
|
|
455
|
+
else:
|
|
456
|
+
measured = "N/A"
|
|
457
|
+
threshold_pct = guard_overhead.get("threshold_percent")
|
|
458
|
+
if not (
|
|
459
|
+
isinstance(threshold_pct, int | float)
|
|
460
|
+
and math.isfinite(float(threshold_pct))
|
|
461
|
+
):
|
|
462
|
+
threshold_val = guard_overhead.get("overhead_threshold", 0.01)
|
|
463
|
+
try:
|
|
464
|
+
threshold_pct = float(threshold_val) * 100.0
|
|
465
|
+
except Exception:
|
|
466
|
+
threshold_pct = 1.0
|
|
467
|
+
lines.append(
|
|
468
|
+
f"| Guard Overhead Acceptable | {status} | {measured} | ≤ +{threshold_pct:.1f}% | point | Guarded vs bare PM overhead |"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
# Emit canonical gate rows
|
|
472
|
+
if has_pm:
|
|
473
|
+
_emit_pm_gate_row()
|
|
474
|
+
_emit_drift_gate_row()
|
|
475
|
+
_emit_overhead_gate_row()
|
|
476
|
+
|
|
477
|
+
# Annotate hysteresis usage if applied
|
|
478
|
+
if certificate.get("validation", {}).get("hysteresis_applied"):
|
|
479
|
+
lines.append("- Note: hysteresis applied to gate boundary")
|
|
480
|
+
|
|
481
|
+
lines.append("")
|
|
482
|
+
lines.append("## Safety Check Details")
|
|
483
|
+
lines.append("")
|
|
484
|
+
lines.append("| Safety Check | Status | Measured | Threshold | Description |")
|
|
485
|
+
lines.append("|--------------|--------|----------|-----------|-------------|")
|
|
486
|
+
|
|
487
|
+
inv_summary = certificate["invariants"]
|
|
488
|
+
validation = certificate.get("validation", {})
|
|
489
|
+
inv_status = "✅ PASS" if validation.get("invariants_pass", False) else "❌ FAIL"
|
|
490
|
+
inv_counts = inv_summary.get("summary", {}) or {}
|
|
491
|
+
inv_measure = inv_summary.get("status", "pass").upper()
|
|
492
|
+
fatal_violations = inv_counts.get("fatal_violations") or 0
|
|
493
|
+
warning_violations = (
|
|
494
|
+
inv_counts.get("warning_violations") or inv_counts.get("violations_found") or 0
|
|
495
|
+
)
|
|
496
|
+
if fatal_violations:
|
|
497
|
+
suffix = f"{fatal_violations} fatal"
|
|
498
|
+
if warning_violations:
|
|
499
|
+
suffix += f", {warning_violations} warning"
|
|
500
|
+
inv_measure = f"{inv_measure} ({suffix})"
|
|
501
|
+
elif warning_violations:
|
|
502
|
+
inv_measure = f"{inv_measure} ({warning_violations} warning)"
|
|
503
|
+
lines.append(
|
|
504
|
+
f"| Invariants | {inv_status} | {inv_measure} | pass | Model integrity checks |"
|
|
505
|
+
)
|
|
506
|
+
invariants_failures = inv_summary.get("failures") or []
|
|
507
|
+
if warning_violations and not fatal_violations:
|
|
508
|
+
non_fatal_message = None
|
|
509
|
+
for failure in invariants_failures:
|
|
510
|
+
if isinstance(failure, dict):
|
|
511
|
+
msg = failure.get("message") or failure.get("type")
|
|
512
|
+
if msg:
|
|
513
|
+
non_fatal_message = msg
|
|
514
|
+
break
|
|
515
|
+
if not non_fatal_message:
|
|
516
|
+
non_fatal_message = "Non-fatal invariant warnings present."
|
|
517
|
+
lines.append(f"- Non-fatal: {non_fatal_message}")
|
|
518
|
+
|
|
519
|
+
spec_status = "✅ PASS" if validation.get("spectral_stable", False) else "❌ FAIL"
|
|
520
|
+
caps_applied = certificate["spectral"]["caps_applied"]
|
|
521
|
+
lines.append(
|
|
522
|
+
f"| Spectral Stability | {spec_status} | {caps_applied} violations | < 5 | Weight matrix spectral norms |"
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# Catastrophic spike safety stop row is now driven by primary metric flags
|
|
526
|
+
if isinstance(certificate.get("primary_metric"), dict):
|
|
527
|
+
pm_ok = bool(validation.get("primary_metric_acceptable", True))
|
|
528
|
+
pm_ratio = certificate.get("primary_metric", {}).get("ratio_vs_baseline")
|
|
529
|
+
if isinstance(pm_ratio, int | float):
|
|
530
|
+
lines.append(
|
|
531
|
+
f"| Catastrophic Spike Gate (safety stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Include RMT Health row for compatibility and clarity
|
|
535
|
+
rmt_status = "✅ PASS" if validation.get("rmt_stable", False) else "❌ FAIL"
|
|
536
|
+
rmt_state = certificate.get("rmt", {}).get("status", "unknown").title()
|
|
537
|
+
lines.append(
|
|
538
|
+
f"| RMT Health | {rmt_status} | {rmt_state} | ε-rule | Random Matrix Theory guard status |"
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
# Pairing + Bootstrap snapshot (quick audit surface)
|
|
542
|
+
try:
|
|
543
|
+
stats = (
|
|
544
|
+
certificate.get("dataset", {}).get("windows", {}).get("stats", {})
|
|
545
|
+
or certificate.get("ppl", {}).get("stats", {})
|
|
546
|
+
or {}
|
|
547
|
+
)
|
|
548
|
+
paired_windows = stats.get("paired_windows")
|
|
549
|
+
match_frac = stats.get("window_match_fraction")
|
|
550
|
+
overlap_frac = stats.get("window_overlap_fraction")
|
|
551
|
+
bootstrap = stats.get("bootstrap") or {}
|
|
552
|
+
if (
|
|
553
|
+
paired_windows is not None
|
|
554
|
+
or match_frac is not None
|
|
555
|
+
or overlap_frac is not None
|
|
556
|
+
):
|
|
557
|
+
lines.append("")
|
|
558
|
+
lines.append(
|
|
559
|
+
f"- Pairing: paired={paired_windows}, match={match_frac:.3f}, overlap={overlap_frac:.3f}"
|
|
560
|
+
)
|
|
561
|
+
if isinstance(bootstrap, dict):
|
|
562
|
+
reps = bootstrap.get("replicates")
|
|
563
|
+
bseed = bootstrap.get("seed")
|
|
564
|
+
if reps is not None or bseed is not None:
|
|
565
|
+
lines.append(f"- Bootstrap: replicates={reps}, seed={bseed}")
|
|
566
|
+
# Optional: show log-space paired Δ CI next to ratio CI for clarity
|
|
567
|
+
delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
|
|
568
|
+
"ppl", {}
|
|
569
|
+
).get("logloss_delta_ci")
|
|
570
|
+
if (
|
|
571
|
+
isinstance(delta_ci, tuple | list)
|
|
572
|
+
and len(delta_ci) == 2
|
|
573
|
+
and all(isinstance(x, int | float) for x in delta_ci)
|
|
574
|
+
):
|
|
575
|
+
lines.append(f"- Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
|
|
576
|
+
except Exception:
|
|
577
|
+
pass
|
|
578
|
+
|
|
579
|
+
if invariants_failures:
|
|
580
|
+
lines.append("")
|
|
581
|
+
lines.append("**Invariant Notes**")
|
|
582
|
+
lines.append("")
|
|
583
|
+
for failure in invariants_failures:
|
|
584
|
+
severity = failure.get("severity", "warning")
|
|
585
|
+
detail = failure.get("detail", {})
|
|
586
|
+
detail_str = ""
|
|
587
|
+
if isinstance(detail, dict) and detail:
|
|
588
|
+
detail_str = ", ".join(f"{k}={v}" for k, v in detail.items())
|
|
589
|
+
detail_str = f" ({detail_str})"
|
|
590
|
+
lines.append(
|
|
591
|
+
f"- {failure.get('check', 'unknown')} [{severity}]: {failure.get('type', 'violation')}{detail_str}"
|
|
592
|
+
)
|
|
593
|
+
|
|
594
|
+
lines.append("")
|
|
595
|
+
|
|
596
|
+
# Guard observability snapshots
|
|
597
|
+
lines.append("## Guard Observability")
|
|
598
|
+
lines.append("")
|
|
599
|
+
|
|
600
|
+
spectral_info = certificate.get("spectral", {}) or {}
|
|
601
|
+
if spectral_info:
|
|
602
|
+
lines.append("### Spectral Guard")
|
|
603
|
+
lines.append("")
|
|
604
|
+
mt_info = spectral_info.get("multiple_testing", {}) or {}
|
|
605
|
+
if mt_info:
|
|
606
|
+
lines.append("- **Multiple Testing:**")
|
|
607
|
+
lines.append(" ```yaml")
|
|
608
|
+
mt_yaml = (
|
|
609
|
+
yaml.safe_dump(mt_info, sort_keys=True, width=70).strip().splitlines()
|
|
610
|
+
)
|
|
611
|
+
for line in mt_yaml:
|
|
612
|
+
lines.append(f" {line}")
|
|
613
|
+
lines.append(" ```")
|
|
614
|
+
# Spectral summary (place key knobs together for quick scan)
|
|
615
|
+
spec_sigma = spectral_info.get("sigma_quantile")
|
|
616
|
+
spec_deadband = spectral_info.get("deadband")
|
|
617
|
+
spec_max_caps = spectral_info.get("max_caps")
|
|
618
|
+
summary_yaml = {
|
|
619
|
+
"sigma_quantile": float(spec_sigma)
|
|
620
|
+
if isinstance(spec_sigma, int | float)
|
|
621
|
+
else None,
|
|
622
|
+
"deadband": float(spec_deadband)
|
|
623
|
+
if isinstance(spec_deadband, int | float)
|
|
624
|
+
else None,
|
|
625
|
+
"max_caps": int(spec_max_caps)
|
|
626
|
+
if isinstance(spec_max_caps, int | float)
|
|
627
|
+
else None,
|
|
628
|
+
}
|
|
629
|
+
# Drop Nones from summary
|
|
630
|
+
summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
|
|
631
|
+
if summary_yaml:
|
|
632
|
+
lines.append("- **Spectral Summary:**")
|
|
633
|
+
lines.append(" ```yaml")
|
|
634
|
+
for line in (
|
|
635
|
+
yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
|
|
636
|
+
.strip()
|
|
637
|
+
.splitlines()
|
|
638
|
+
):
|
|
639
|
+
lines.append(f" {line}")
|
|
640
|
+
lines.append(" ```")
|
|
641
|
+
lines.append(
|
|
642
|
+
f"- Caps Applied: {spectral_info.get('caps_applied')} / {spectral_info.get('max_caps')}"
|
|
643
|
+
)
|
|
644
|
+
summary = spectral_info.get("summary", {}) or {}
|
|
645
|
+
lines.append(f"- Caps Exceeded: {summary.get('caps_exceeded', False)}")
|
|
646
|
+
caps_by_family = spectral_info.get("caps_applied_by_family") or {}
|
|
647
|
+
family_caps = spectral_info.get("family_caps") or {}
|
|
648
|
+
if caps_by_family:
|
|
649
|
+
lines.append("")
|
|
650
|
+
lines.append("| Family | κ | Violations |")
|
|
651
|
+
lines.append("|--------|---|------------|")
|
|
652
|
+
for family, count in caps_by_family.items():
|
|
653
|
+
kappa = family_caps.get(family, {}).get("kappa")
|
|
654
|
+
if isinstance(kappa, int | float) and math.isfinite(float(kappa)):
|
|
655
|
+
kappa_str = f"{kappa:.3f}"
|
|
656
|
+
else:
|
|
657
|
+
kappa_str = "-"
|
|
658
|
+
lines.append(f"| {family} | {kappa_str} | {count} |")
|
|
659
|
+
lines.append("")
|
|
660
|
+
quantiles = spectral_info.get("family_z_quantiles") or {}
|
|
661
|
+
if quantiles:
|
|
662
|
+
lines.append("| Family | q95 | q99 | Max | Samples |")
|
|
663
|
+
lines.append("|--------|-----|-----|-----|---------|")
|
|
664
|
+
for family, stats in quantiles.items():
|
|
665
|
+
q95 = stats.get("q95")
|
|
666
|
+
q99 = stats.get("q99")
|
|
667
|
+
max_z = stats.get("max")
|
|
668
|
+
count = stats.get("count")
|
|
669
|
+
q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
|
|
670
|
+
q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
|
|
671
|
+
max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
|
|
672
|
+
count_str = str(count) if isinstance(count, int | float) else "-"
|
|
673
|
+
lines.append(
|
|
674
|
+
f"| {family} | {q95_str} | {q99_str} | {max_str} | {count_str} |"
|
|
675
|
+
)
|
|
676
|
+
lines.append("")
|
|
677
|
+
policy_caps = spectral_info.get("policy", {}).get("family_caps")
|
|
678
|
+
if policy_caps:
|
|
679
|
+
lines.append("- **Family κ (policy):**")
|
|
680
|
+
lines.append(" ```yaml")
|
|
681
|
+
caps_yaml = (
|
|
682
|
+
yaml.safe_dump(policy_caps, sort_keys=True, width=70)
|
|
683
|
+
.strip()
|
|
684
|
+
.splitlines()
|
|
685
|
+
)
|
|
686
|
+
for line in caps_yaml:
|
|
687
|
+
lines.append(f" {line}")
|
|
688
|
+
lines.append(" ```")
|
|
689
|
+
top_scores = spectral_info.get("top_z_scores") or {}
|
|
690
|
+
if top_scores:
|
|
691
|
+
lines.append("Top |z| per family:")
|
|
692
|
+
for family in sorted(top_scores.keys()):
|
|
693
|
+
entries = top_scores[family]
|
|
694
|
+
if not entries:
|
|
695
|
+
continue
|
|
696
|
+
formatted_entries = []
|
|
697
|
+
for entry in entries:
|
|
698
|
+
module_name = entry.get("module", "unknown")
|
|
699
|
+
z_val = entry.get("z")
|
|
700
|
+
if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
|
|
701
|
+
z_str = f"{z_val:.3f}"
|
|
702
|
+
else:
|
|
703
|
+
z_str = "n/a"
|
|
704
|
+
formatted_entries.append(f"{module_name} (|z|={z_str})")
|
|
705
|
+
lines.append(f"- {family}: {', '.join(formatted_entries)}")
|
|
706
|
+
lines.append("")
|
|
707
|
+
|
|
708
|
+
rmt_info = certificate.get("rmt", {}) or {}
|
|
709
|
+
if rmt_info:
|
|
710
|
+
lines.append("### RMT Guard")
|
|
711
|
+
lines.append("")
|
|
712
|
+
families = rmt_info.get("families") or {}
|
|
713
|
+
if families:
|
|
714
|
+
lines.append("| Family | ε_f | Bare | Guarded | Δ |")
|
|
715
|
+
lines.append("|--------|-----|------|---------|---|")
|
|
716
|
+
for family, data in families.items():
|
|
717
|
+
epsilon_val = data.get("epsilon")
|
|
718
|
+
epsilon_str = (
|
|
719
|
+
f"{epsilon_val:.3f}"
|
|
720
|
+
if isinstance(epsilon_val, int | float)
|
|
721
|
+
else "-"
|
|
722
|
+
)
|
|
723
|
+
bare_count = data.get("bare", 0)
|
|
724
|
+
guarded_count = data.get("guarded", 0)
|
|
725
|
+
delta_val = None
|
|
726
|
+
try:
|
|
727
|
+
bare_str = str(int(bare_count))
|
|
728
|
+
except (TypeError, ValueError):
|
|
729
|
+
bare_str = "-"
|
|
730
|
+
try:
|
|
731
|
+
guarded_str = str(int(guarded_count))
|
|
732
|
+
except (TypeError, ValueError):
|
|
733
|
+
guarded_str = "-"
|
|
734
|
+
try:
|
|
735
|
+
delta_val = int(guarded_count) - int(bare_count) # type: ignore[arg-type]
|
|
736
|
+
except Exception:
|
|
737
|
+
delta_val = None
|
|
738
|
+
delta_str = f"{delta_val:+d}" if isinstance(delta_val, int) else "-"
|
|
739
|
+
lines.append(
|
|
740
|
+
f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
|
|
741
|
+
)
|
|
742
|
+
lines.append("")
|
|
743
|
+
# Delta total and stability flags
|
|
744
|
+
delta_total = rmt_info.get("delta_total")
|
|
745
|
+
if isinstance(delta_total, int):
|
|
746
|
+
lines.append(f"- Δ total: {delta_total:+d}")
|
|
747
|
+
lines.append(f"- Stable: {rmt_info.get('stable', True)}")
|
|
748
|
+
lines.append("")
|
|
749
|
+
|
|
750
|
+
guard_overhead_info = certificate.get("guard_overhead", {}) or {}
|
|
751
|
+
if guard_overhead_info:
|
|
752
|
+
lines.append("### Guard Overhead")
|
|
753
|
+
lines.append("")
|
|
754
|
+
evaluated_flag = bool(guard_overhead_info.get("evaluated", True))
|
|
755
|
+
if not evaluated_flag:
|
|
756
|
+
# Make explicit when overhead was not evaluated by policy/profile
|
|
757
|
+
lines.append("- Evaluated: false (skipped by policy/profile)")
|
|
758
|
+
bare_ppl = guard_overhead_info.get("bare_ppl")
|
|
759
|
+
guarded_ppl = guard_overhead_info.get("guarded_ppl")
|
|
760
|
+
if isinstance(bare_ppl, int | float) and math.isfinite(float(bare_ppl)):
|
|
761
|
+
lines.append(f"- Bare Primary Metric: {bare_ppl:.3f}")
|
|
762
|
+
if isinstance(guarded_ppl, int | float) and math.isfinite(float(guarded_ppl)):
|
|
763
|
+
lines.append(f"- Guarded Primary Metric: {guarded_ppl:.3f}")
|
|
764
|
+
ratio = guard_overhead_info.get("overhead_ratio")
|
|
765
|
+
percent = guard_overhead_info.get("overhead_percent")
|
|
766
|
+
if (
|
|
767
|
+
isinstance(ratio, int | float)
|
|
768
|
+
and math.isfinite(float(ratio))
|
|
769
|
+
and isinstance(percent, int | float)
|
|
770
|
+
and math.isfinite(float(percent))
|
|
771
|
+
):
|
|
772
|
+
lines.append(f"- Overhead: {ratio:.4f}x ({percent:+.2f}%)")
|
|
773
|
+
elif isinstance(ratio, int | float) and math.isfinite(float(ratio)):
|
|
774
|
+
lines.append(f"- Overhead: {ratio:.4f}x")
|
|
775
|
+
overhead_source = guard_overhead_info.get("source")
|
|
776
|
+
if overhead_source:
|
|
777
|
+
lines.append(f"- Source: {overhead_source}")
|
|
778
|
+
plan_ctx = certificate.get("provenance", {}).get("window_plan", {})
|
|
779
|
+
if isinstance(plan_ctx, dict) and plan_ctx:
|
|
780
|
+
plan_preview = (
|
|
781
|
+
plan_ctx.get("preview_n")
|
|
782
|
+
if plan_ctx.get("preview_n") is not None
|
|
783
|
+
else plan_ctx.get("actual_preview")
|
|
784
|
+
)
|
|
785
|
+
plan_final = (
|
|
786
|
+
plan_ctx.get("final_n")
|
|
787
|
+
if plan_ctx.get("final_n") is not None
|
|
788
|
+
else plan_ctx.get("actual_final")
|
|
789
|
+
)
|
|
790
|
+
plan_profile = plan_ctx.get("profile")
|
|
791
|
+
lines.append(
|
|
792
|
+
f"- Window Plan Used: profile={plan_profile}, preview={plan_preview}, final={plan_final}"
|
|
793
|
+
)
|
|
794
|
+
lines.append("")
|
|
795
|
+
|
|
796
|
+
compression_diag = (
|
|
797
|
+
certificate.get("structure", {}).get("compression_diagnostics", {})
|
|
798
|
+
if isinstance(certificate.get("structure"), dict)
|
|
799
|
+
else {}
|
|
800
|
+
)
|
|
801
|
+
inference_flags = compression_diag.get("inferred") or {}
|
|
802
|
+
inference_sources = compression_diag.get("inference_source") or {}
|
|
803
|
+
inference_log = compression_diag.get("inference_log") or []
|
|
804
|
+
if inference_flags or inference_sources or inference_log:
|
|
805
|
+
lines.append("## Inference")
|
|
806
|
+
lines.append("")
|
|
807
|
+
if inference_flags:
|
|
808
|
+
lines.append("- **Fields Inferred:**")
|
|
809
|
+
for field, flag in inference_flags.items():
|
|
810
|
+
lines.append(f" - {field}: {'yes' if flag else 'no'}")
|
|
811
|
+
if inference_sources:
|
|
812
|
+
lines.append("- **Sources:**")
|
|
813
|
+
for field, source in inference_sources.items():
|
|
814
|
+
lines.append(f" - {field}: {source}")
|
|
815
|
+
if inference_log:
|
|
816
|
+
lines.append("- **Inference Log:**")
|
|
817
|
+
for entry in inference_log:
|
|
818
|
+
lines.append(f" - {entry}")
|
|
819
|
+
lines.append("")
|
|
820
|
+
|
|
821
|
+
# Model and Configuration
|
|
822
|
+
lines.append("## Model Information")
|
|
823
|
+
lines.append("")
|
|
824
|
+
meta = certificate["meta"]
|
|
825
|
+
lines.append(f"- **Model ID:** {meta.get('model_id')}")
|
|
826
|
+
lines.append(f"- **Adapter:** {meta.get('adapter')}")
|
|
827
|
+
lines.append(f"- **Device:** {meta.get('device')}")
|
|
828
|
+
lines.append(f"- **Timestamp:** {meta.get('ts')}")
|
|
829
|
+
commit_value = meta.get("commit") or ""
|
|
830
|
+
if commit_value:
|
|
831
|
+
short_sha = str(commit_value)[:12]
|
|
832
|
+
lines.append(f"- **Commit:** {short_sha}")
|
|
833
|
+
else:
|
|
834
|
+
lines.append("- **Commit:** (not set)")
|
|
835
|
+
lines.append(f"- **Seed:** {meta.get('seed')}")
|
|
836
|
+
seeds_map = meta.get("seeds", {})
|
|
837
|
+
if isinstance(seeds_map, dict) and seeds_map:
|
|
838
|
+
lines.append(
|
|
839
|
+
"- **Seeds:** "
|
|
840
|
+
f"python={seeds_map.get('python')}, "
|
|
841
|
+
f"numpy={seeds_map.get('numpy')}, "
|
|
842
|
+
f"torch={seeds_map.get('torch')}"
|
|
843
|
+
)
|
|
844
|
+
invarlock_version = meta.get("invarlock_version")
|
|
845
|
+
if invarlock_version:
|
|
846
|
+
lines.append(f"- **InvarLock Version:** {invarlock_version}")
|
|
847
|
+
env_flags = meta.get("env_flags")
|
|
848
|
+
if isinstance(env_flags, dict) and env_flags:
|
|
849
|
+
lines.append("- **Env Flags:**")
|
|
850
|
+
lines.append(" ```yaml")
|
|
851
|
+
for k, v in env_flags.items():
|
|
852
|
+
lines.append(f" {k}: {v}")
|
|
853
|
+
lines.append(" ```")
|
|
854
|
+
# Determinism flags (if present)
|
|
855
|
+
cuda_flags = meta.get("cuda_flags")
|
|
856
|
+
if isinstance(cuda_flags, dict) and cuda_flags:
|
|
857
|
+
parts = []
|
|
858
|
+
for key in (
|
|
859
|
+
"deterministic_algorithms",
|
|
860
|
+
"cudnn_deterministic",
|
|
861
|
+
"cudnn_benchmark",
|
|
862
|
+
"cudnn_allow_tf32",
|
|
863
|
+
"cuda_matmul_allow_tf32",
|
|
864
|
+
"CUBLAS_WORKSPACE_CONFIG",
|
|
865
|
+
):
|
|
866
|
+
if key in cuda_flags and cuda_flags[key] is not None:
|
|
867
|
+
parts.append(f"{key}={cuda_flags[key]}")
|
|
868
|
+
if parts:
|
|
869
|
+
lines.append(f"- **Determinism Flags:** {', '.join(parts)}")
|
|
870
|
+
lines.append("")
|
|
871
|
+
|
|
872
|
+
# Edit Configuration (removed duplicate Edit Information section)
|
|
873
|
+
|
|
874
|
+
# Auto-tuning Configuration
|
|
875
|
+
auto = certificate["auto"]
|
|
876
|
+
if auto["tier"] != "none":
|
|
877
|
+
lines.append("## Auto-Tuning Configuration")
|
|
878
|
+
lines.append("")
|
|
879
|
+
lines.append(f"- **Tier:** {auto['tier']}")
|
|
880
|
+
lines.append(f"- **Probes Used:** {auto['probes_used']}")
|
|
881
|
+
if auto.get("target_pm_ratio"):
|
|
882
|
+
lines.append(
|
|
883
|
+
f"- **Auto Policy Target Ratio (informational):** {auto['target_pm_ratio']:.3f}"
|
|
884
|
+
)
|
|
885
|
+
# Tiny relax breadcrumb for dev-only demos
|
|
886
|
+
try:
|
|
887
|
+
if bool(auto.get("tiny_relax")):
|
|
888
|
+
lines.append("- Tiny relax: enabled (dev-only)")
|
|
889
|
+
except Exception:
|
|
890
|
+
pass
|
|
891
|
+
lines.append("")
|
|
892
|
+
|
|
893
|
+
resolved_policy = certificate.get("resolved_policy")
|
|
894
|
+
if resolved_policy:
|
|
895
|
+
lines.append("## Resolved Policy")
|
|
896
|
+
lines.append("")
|
|
897
|
+
lines.append("```yaml")
|
|
898
|
+
resolved_yaml = yaml.safe_dump(
|
|
899
|
+
resolved_policy, sort_keys=True, width=80, default_flow_style=False
|
|
900
|
+
).strip()
|
|
901
|
+
for line in resolved_yaml.splitlines():
|
|
902
|
+
lines.append(line)
|
|
903
|
+
lines.append("```")
|
|
904
|
+
lines.append("")
|
|
905
|
+
|
|
906
|
+
policy_provenance = certificate.get("policy_provenance", {})
|
|
907
|
+
if policy_provenance:
|
|
908
|
+
lines.append("## Policy Provenance")
|
|
909
|
+
lines.append("")
|
|
910
|
+
lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
|
|
911
|
+
overrides_list = policy_provenance.get("overrides") or []
|
|
912
|
+
if overrides_list:
|
|
913
|
+
lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
|
|
914
|
+
else:
|
|
915
|
+
lines.append("- **Overrides:** (none)")
|
|
916
|
+
digest_value = policy_provenance.get("policy_digest")
|
|
917
|
+
if digest_value:
|
|
918
|
+
lines.append(f"- **Policy Digest:** `{digest_value}`")
|
|
919
|
+
else:
|
|
920
|
+
lines.append("- **Policy Digest:** (not recorded)")
|
|
921
|
+
if policy_provenance.get("resolved_at"):
|
|
922
|
+
lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
|
|
923
|
+
lines.append("")
|
|
924
|
+
|
|
925
|
+
# Dataset Information
|
|
926
|
+
lines.append("## Dataset Configuration")
|
|
927
|
+
lines.append("")
|
|
928
|
+
dataset = certificate.get("dataset", {}) or {}
|
|
929
|
+
prov = (
|
|
930
|
+
(dataset.get("provider") or "unknown")
|
|
931
|
+
if isinstance(dataset, dict)
|
|
932
|
+
else "unknown"
|
|
933
|
+
)
|
|
934
|
+
lines.append(f"- **Provider:** {prov}")
|
|
935
|
+
try:
|
|
936
|
+
seq_len_val = (
|
|
937
|
+
int(dataset.get("seq_len"))
|
|
938
|
+
if isinstance(dataset.get("seq_len"), int | float)
|
|
939
|
+
else dataset.get("seq_len")
|
|
940
|
+
)
|
|
941
|
+
except Exception: # pragma: no cover - defensive
|
|
942
|
+
seq_len_val = dataset.get("seq_len")
|
|
943
|
+
if seq_len_val is not None:
|
|
944
|
+
lines.append(f"- **Sequence Length:** {seq_len_val}")
|
|
945
|
+
windows_blk = (
|
|
946
|
+
dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
|
|
947
|
+
)
|
|
948
|
+
win_prev = windows_blk.get("preview")
|
|
949
|
+
win_final = windows_blk.get("final")
|
|
950
|
+
if win_prev is not None and win_final is not None:
|
|
951
|
+
lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
|
|
952
|
+
if windows_blk.get("seed") is not None:
|
|
953
|
+
lines.append(f"- **Seed:** {windows_blk.get('seed')}")
|
|
954
|
+
hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
|
|
955
|
+
if hash_blk.get("preview_tokens") is not None:
|
|
956
|
+
lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
|
|
957
|
+
if hash_blk.get("final_tokens") is not None:
|
|
958
|
+
lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
|
|
959
|
+
if hash_blk.get("total_tokens") is not None:
|
|
960
|
+
lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
|
|
961
|
+
if hash_blk.get("dataset"):
|
|
962
|
+
lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
|
|
963
|
+
tokenizer = dataset.get("tokenizer", {})
|
|
964
|
+
if tokenizer.get("name") or tokenizer.get("hash"):
|
|
965
|
+
vocab_size = tokenizer.get("vocab_size")
|
|
966
|
+
vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
|
|
967
|
+
lines.append(
|
|
968
|
+
f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
|
|
969
|
+
)
|
|
970
|
+
if tokenizer.get("hash"):
|
|
971
|
+
lines.append(f" - Hash: {tokenizer['hash']}")
|
|
972
|
+
lines.append(
|
|
973
|
+
f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
|
|
974
|
+
)
|
|
975
|
+
if tokenizer.get("pad_token") is not None:
|
|
976
|
+
lines.append(f" - PAD: {tokenizer.get('pad_token')}")
|
|
977
|
+
if tokenizer.get("add_prefix_space") is not None:
|
|
978
|
+
lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
|
|
979
|
+
lines.append("")
|
|
980
|
+
|
|
981
|
+
provenance_info = certificate.get("provenance", {}) or {}
|
|
982
|
+
if provenance_info:
|
|
983
|
+
lines.append("## Run Provenance")
|
|
984
|
+
lines.append("")
|
|
985
|
+
baseline_info = provenance_info.get("baseline", {}) or {}
|
|
986
|
+
if baseline_info:
|
|
987
|
+
lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
|
|
988
|
+
if baseline_info.get("report_hash"):
|
|
989
|
+
lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
|
|
990
|
+
if baseline_info.get("report_path"):
|
|
991
|
+
lines.append(f" - Report Path: {baseline_info.get('report_path')}")
|
|
992
|
+
edited_info = provenance_info.get("edited", {}) or {}
|
|
993
|
+
if edited_info:
|
|
994
|
+
lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
|
|
995
|
+
if edited_info.get("report_hash"):
|
|
996
|
+
lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
|
|
997
|
+
if edited_info.get("report_path"):
|
|
998
|
+
lines.append(f" - Report Path: {edited_info.get('report_path')}")
|
|
999
|
+
window_plan = provenance_info.get("window_plan")
|
|
1000
|
+
if isinstance(window_plan, dict) and window_plan:
|
|
1001
|
+
preview_val = window_plan.get(
|
|
1002
|
+
"preview_n", window_plan.get("actual_preview")
|
|
1003
|
+
)
|
|
1004
|
+
final_val = window_plan.get("final_n", window_plan.get("actual_final"))
|
|
1005
|
+
lines.append(
|
|
1006
|
+
f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
|
|
1007
|
+
)
|
|
1008
|
+
provider_digest = provenance_info.get("provider_digest")
|
|
1009
|
+
if isinstance(provider_digest, dict) and provider_digest:
|
|
1010
|
+
ids_d = provider_digest.get("ids_sha256")
|
|
1011
|
+
tok_d = provider_digest.get("tokenizer_sha256")
|
|
1012
|
+
mask_d = provider_digest.get("masking_sha256")
|
|
1013
|
+
|
|
1014
|
+
lines.append("- **Provider Digest:**")
|
|
1015
|
+
if tok_d:
|
|
1016
|
+
lines.append(
|
|
1017
|
+
f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
|
|
1018
|
+
)
|
|
1019
|
+
if ids_d:
|
|
1020
|
+
lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
|
|
1021
|
+
if mask_d:
|
|
1022
|
+
lines.append(
|
|
1023
|
+
f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
|
|
1024
|
+
)
|
|
1025
|
+
# Surface confidence label prominently
|
|
1026
|
+
try:
|
|
1027
|
+
conf = certificate.get("confidence", {}) or {}
|
|
1028
|
+
if isinstance(conf, dict) and conf.get("label"):
|
|
1029
|
+
lines.append(f"- **Confidence:** {conf.get('label')}")
|
|
1030
|
+
except Exception:
|
|
1031
|
+
pass
|
|
1032
|
+
# Surface policy version + thresholds hash (short)
|
|
1033
|
+
try:
|
|
1034
|
+
pd = certificate.get("policy_digest", {}) or {}
|
|
1035
|
+
if isinstance(pd, dict) and pd:
|
|
1036
|
+
pv = pd.get("policy_version")
|
|
1037
|
+
th = pd.get("thresholds_hash")
|
|
1038
|
+
if pv:
|
|
1039
|
+
lines.append(f"- **Policy Version:** {pv}")
|
|
1040
|
+
if isinstance(th, str) and th:
|
|
1041
|
+
short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
|
|
1042
|
+
lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
|
|
1043
|
+
if pd.get("changed"):
|
|
1044
|
+
lines.append("- Note: policy changed")
|
|
1045
|
+
except Exception:
|
|
1046
|
+
pass
|
|
1047
|
+
lines.append("")
|
|
1048
|
+
|
|
1049
|
+
# Structural Changes heading is printed with content later; avoid empty header here
|
|
1050
|
+
|
|
1051
|
+
# Primary Metric (metric-v1) snapshot, if present
|
|
1052
|
+
try:
|
|
1053
|
+
pm = certificate.get("primary_metric")
|
|
1054
|
+
if isinstance(pm, dict) and pm:
|
|
1055
|
+
kind = pm.get("kind", "unknown")
|
|
1056
|
+
lines.append(f"## Primary Metric ({kind})")
|
|
1057
|
+
lines.append("")
|
|
1058
|
+
unit = pm.get("unit", "-")
|
|
1059
|
+
paired = pm.get("paired", False)
|
|
1060
|
+
reps = None
|
|
1061
|
+
# Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
|
|
1062
|
+
# Mark estimated metrics (e.g., pseudo accuracy counts) clearly
|
|
1063
|
+
estimated_flag = False
|
|
1064
|
+
try:
|
|
1065
|
+
if bool(pm.get("estimated")):
|
|
1066
|
+
estimated_flag = True
|
|
1067
|
+
elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
|
|
1068
|
+
estimated_flag = True
|
|
1069
|
+
except Exception:
|
|
1070
|
+
estimated_flag = False
|
|
1071
|
+
est_suffix = " (estimated)" if estimated_flag else ""
|
|
1072
|
+
lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
|
|
1073
|
+
gating_basis = pm.get("gating_basis") or pm.get("basis")
|
|
1074
|
+
if gating_basis:
|
|
1075
|
+
lines.append(f"- Basis: {gating_basis}")
|
|
1076
|
+
if isinstance(paired, bool):
|
|
1077
|
+
lines.append(f"- Paired: {paired}")
|
|
1078
|
+
reps = pm.get("reps")
|
|
1079
|
+
if isinstance(reps, int | float):
|
|
1080
|
+
lines.append(f"- Bootstrap Reps: {int(reps)}")
|
|
1081
|
+
ci = pm.get("ci") or pm.get("display_ci")
|
|
1082
|
+
if (
|
|
1083
|
+
isinstance(ci, list | tuple)
|
|
1084
|
+
and len(ci) == 2
|
|
1085
|
+
and all(isinstance(x, int | float) for x in ci)
|
|
1086
|
+
):
|
|
1087
|
+
lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
|
|
1088
|
+
prev = pm.get("preview")
|
|
1089
|
+
fin = pm.get("final")
|
|
1090
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
1091
|
+
|
|
1092
|
+
lines.append("")
|
|
1093
|
+
if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
|
|
1094
|
+
lines.append(
|
|
1095
|
+
"- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
|
|
1096
|
+
)
|
|
1097
|
+
lines.append("| Field | Value |")
|
|
1098
|
+
lines.append("|-------|-------|")
|
|
1099
|
+
lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
|
|
1100
|
+
lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
|
|
1101
|
+
# For accuracy, ratio field is actually a delta (as per helper); clarify inline
|
|
1102
|
+
if kind in {"accuracy", "vqa_accuracy"}:
|
|
1103
|
+
lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
|
|
1104
|
+
# When baseline accuracy is near-zero, clarify display rule
|
|
1105
|
+
try:
|
|
1106
|
+
base_pt = pm.get("baseline_point")
|
|
1107
|
+
if isinstance(base_pt, int | float) and base_pt < 0.05:
|
|
1108
|
+
lines.append(
|
|
1109
|
+
"- Note: baseline < 5%; ratio suppressed; showing Δpp"
|
|
1110
|
+
)
|
|
1111
|
+
except Exception:
|
|
1112
|
+
pass
|
|
1113
|
+
else:
|
|
1114
|
+
try:
|
|
1115
|
+
lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
|
|
1116
|
+
except Exception:
|
|
1117
|
+
lines.append("| Ratio vs Baseline | N/A |")
|
|
1118
|
+
lines.append("")
|
|
1119
|
+
# Secondary metrics (informational)
|
|
1120
|
+
try:
|
|
1121
|
+
secs = certificate.get("secondary_metrics")
|
|
1122
|
+
if isinstance(secs, list) and secs:
|
|
1123
|
+
lines.append("## Secondary Metrics (informational)")
|
|
1124
|
+
lines.append("")
|
|
1125
|
+
lines.append("| Kind | Preview | Final | vs Baseline | CI |")
|
|
1126
|
+
lines.append("|------|---------|-------|-------------|----|")
|
|
1127
|
+
for m in secs:
|
|
1128
|
+
if not isinstance(m, dict):
|
|
1129
|
+
continue
|
|
1130
|
+
k = m.get("kind", "?")
|
|
1131
|
+
pv = _fmt_by_kind(m.get("preview"), str(k))
|
|
1132
|
+
fv = _fmt_by_kind(m.get("final"), str(k))
|
|
1133
|
+
rb = m.get("ratio_vs_baseline")
|
|
1134
|
+
try:
|
|
1135
|
+
rb_str = (
|
|
1136
|
+
f"{float(rb):.3f}"
|
|
1137
|
+
if (str(k).startswith("ppl"))
|
|
1138
|
+
else _fmt_by_kind(rb, str(k))
|
|
1139
|
+
)
|
|
1140
|
+
except Exception:
|
|
1141
|
+
rb_str = "N/A"
|
|
1142
|
+
ci = m.get("display_ci") or m.get("ci")
|
|
1143
|
+
if isinstance(ci, tuple | list) and len(ci) == 2:
|
|
1144
|
+
ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
|
|
1145
|
+
else:
|
|
1146
|
+
ci_str = "–"
|
|
1147
|
+
lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
|
|
1148
|
+
lines.append("")
|
|
1149
|
+
except Exception:
|
|
1150
|
+
pass
|
|
1151
|
+
except Exception:
|
|
1152
|
+
pass
|
|
1153
|
+
|
|
1154
|
+
# System Overhead section (latency/throughput)
|
|
1155
|
+
sys_over = certificate.get("system_overhead", {}) or {}
|
|
1156
|
+
if isinstance(sys_over, dict) and sys_over:
|
|
1157
|
+
_append_system_overhead_section(lines, sys_over)
|
|
1158
|
+
|
|
1159
|
+
# Accuracy Subgroups (informational)
|
|
1160
|
+
try:
|
|
1161
|
+
cls = certificate.get("classification", {})
|
|
1162
|
+
sub = cls.get("subgroups") if isinstance(cls, dict) else None
|
|
1163
|
+
if isinstance(sub, dict) and sub:
|
|
1164
|
+
_append_accuracy_subgroups(lines, sub)
|
|
1165
|
+
except Exception:
|
|
1166
|
+
pass
|
|
1167
|
+
# Structural Changes
|
|
1168
|
+
try:
|
|
1169
|
+
structure = certificate.get("structure", {}) or {}
|
|
1170
|
+
params_changed = int(structure.get("params_changed", 0) or 0)
|
|
1171
|
+
layers_modified = int(structure.get("layers_modified", 0) or 0)
|
|
1172
|
+
bitwidth_changes = 0
|
|
1173
|
+
try:
|
|
1174
|
+
bitwidth_changes = int(len(structure.get("bitwidths", []) or []))
|
|
1175
|
+
except Exception:
|
|
1176
|
+
bitwidth_changes = 0
|
|
1177
|
+
# Decide whether to show the section
|
|
1178
|
+
has_changes = any(
|
|
1179
|
+
v > 0 for v in (params_changed, layers_modified, bitwidth_changes)
|
|
1180
|
+
)
|
|
1181
|
+
edit_name = str(certificate.get("edit_name", "unknown"))
|
|
1182
|
+
if has_changes:
|
|
1183
|
+
lines.append("## Structural Changes")
|
|
1184
|
+
lines.append("")
|
|
1185
|
+
lines.append("| Change Type | Count |")
|
|
1186
|
+
lines.append("|-------------|-------|")
|
|
1187
|
+
lines.append(f"| Parameters Changed | {params_changed:,} |")
|
|
1188
|
+
if edit_name == "quant_rtn":
|
|
1189
|
+
# For quantization: prefer a single clear line reconciling target vs applied
|
|
1190
|
+
# using diagnostics when available. Fallback to bitwidth-change count.
|
|
1191
|
+
try:
|
|
1192
|
+
t_an = (structure.get("compression_diagnostics", {}) or {}).get(
|
|
1193
|
+
"target_analysis", {}
|
|
1194
|
+
)
|
|
1195
|
+
except Exception:
|
|
1196
|
+
t_an = {}
|
|
1197
|
+
eligible = None
|
|
1198
|
+
modified = None
|
|
1199
|
+
if isinstance(t_an, dict) and t_an:
|
|
1200
|
+
eligible = t_an.get("modules_eligible")
|
|
1201
|
+
modified = t_an.get("modules_modified")
|
|
1202
|
+
if isinstance(modified, int) and isinstance(eligible, int):
|
|
1203
|
+
lines.append(
|
|
1204
|
+
f"| Linear Modules Quantized | {modified} of {eligible} targeted |"
|
|
1205
|
+
)
|
|
1206
|
+
else:
|
|
1207
|
+
total_bitwidth_changes = bitwidth_changes
|
|
1208
|
+
if total_bitwidth_changes > 0 and layers_modified > 0:
|
|
1209
|
+
modules_per_layer = total_bitwidth_changes // max(
|
|
1210
|
+
layers_modified, 1
|
|
1211
|
+
)
|
|
1212
|
+
lines.append(
|
|
1213
|
+
f"| Linear Modules Quantized | {total_bitwidth_changes} ({modules_per_layer} per block × {layers_modified} blocks) |"
|
|
1214
|
+
)
|
|
1215
|
+
elif total_bitwidth_changes > 0:
|
|
1216
|
+
lines.append(
|
|
1217
|
+
f"| Linear Modules Quantized | {total_bitwidth_changes} |"
|
|
1218
|
+
)
|
|
1219
|
+
else:
|
|
1220
|
+
lines.append(f"| Layers Modified | {layers_modified} |")
|
|
1221
|
+
lines.append("")
|
|
1222
|
+
except Exception:
|
|
1223
|
+
# Best-effort; omit section on error
|
|
1224
|
+
pass
|
|
1225
|
+
|
|
1226
|
+
# Add detailed breakdowns if available
|
|
1227
|
+
if structure.get("bitwidths") and edit_name != "quant_rtn":
|
|
1228
|
+
lines.append(f"| Bit-width Changes | {len(structure['bitwidths'])} layers |")
|
|
1229
|
+
if structure.get("ranks"):
|
|
1230
|
+
lines.append(f"| Rank Changes | {len(structure['ranks'])} layers |")
|
|
1231
|
+
|
|
1232
|
+
lines.append("")
|
|
1233
|
+
|
|
1234
|
+
# Compression Diagnostics
|
|
1235
|
+
compression_diag = structure.get("compression_diagnostics", {})
|
|
1236
|
+
if edit_name == "noop":
|
|
1237
|
+
lines.append("### Compression Diagnostics")
|
|
1238
|
+
lines.append("")
|
|
1239
|
+
lines.append("Not applicable (no parameters modified).")
|
|
1240
|
+
lines.append("")
|
|
1241
|
+
elif compression_diag:
|
|
1242
|
+
lines.append("### Compression Diagnostics")
|
|
1243
|
+
lines.append("")
|
|
1244
|
+
|
|
1245
|
+
# Algorithm execution status
|
|
1246
|
+
status = compression_diag.get("execution_status", "unknown")
|
|
1247
|
+
status_emoji = (
|
|
1248
|
+
"✅" if status == "successful" else "❌" if status == "failed" else "⚠️"
|
|
1249
|
+
)
|
|
1250
|
+
lines.append(f"**Execution Status:** {status_emoji} {status.upper()}")
|
|
1251
|
+
lines.append("")
|
|
1252
|
+
|
|
1253
|
+
# Target module analysis
|
|
1254
|
+
target_analysis = compression_diag.get("target_analysis", {})
|
|
1255
|
+
if target_analysis:
|
|
1256
|
+
lines.append("**Target Module Analysis:**")
|
|
1257
|
+
lines.append("")
|
|
1258
|
+
lines.append("| Metric | Value |")
|
|
1259
|
+
lines.append("|--------|-------|")
|
|
1260
|
+
lines.append(
|
|
1261
|
+
f"| Modules Found | {target_analysis.get('modules_found', 0)} |"
|
|
1262
|
+
)
|
|
1263
|
+
lines.append(
|
|
1264
|
+
f"| Modules Eligible | {target_analysis.get('modules_eligible', 0)} |"
|
|
1265
|
+
)
|
|
1266
|
+
lines.append(
|
|
1267
|
+
f"| Modules Modified | {target_analysis.get('modules_modified', 0)} |"
|
|
1268
|
+
)
|
|
1269
|
+
try:
|
|
1270
|
+
_eligible = int(target_analysis.get("modules_eligible", 0))
|
|
1271
|
+
_modified = int(target_analysis.get("modules_modified", 0))
|
|
1272
|
+
lines.append(f"| Targets → Applied | {_eligible} → {_modified} |")
|
|
1273
|
+
except Exception:
|
|
1274
|
+
pass
|
|
1275
|
+
lines.append(f"| Scope | {target_analysis.get('scope', 'unknown')} |")
|
|
1276
|
+
lines.append("")
|
|
1277
|
+
|
|
1278
|
+
# Parameter effectiveness
|
|
1279
|
+
param_analysis = compression_diag.get("parameter_analysis", {})
|
|
1280
|
+
if param_analysis:
|
|
1281
|
+
lines.append("**Parameter Effectiveness:**")
|
|
1282
|
+
lines.append("")
|
|
1283
|
+
for param, info in param_analysis.items():
|
|
1284
|
+
if isinstance(info, dict):
|
|
1285
|
+
lines.append(
|
|
1286
|
+
f"- **{param}:** {info.get('value', 'N/A')} ({info.get('effectiveness', 'unknown')})"
|
|
1287
|
+
)
|
|
1288
|
+
else:
|
|
1289
|
+
lines.append(f"- **{param}:** {info}")
|
|
1290
|
+
lines.append("")
|
|
1291
|
+
|
|
1292
|
+
# Algorithm-specific details
|
|
1293
|
+
algo_details = compression_diag.get("algorithm_details", {})
|
|
1294
|
+
if algo_details:
|
|
1295
|
+
lines.append("**Algorithm Details:**")
|
|
1296
|
+
lines.append("")
|
|
1297
|
+
for key, value in algo_details.items():
|
|
1298
|
+
lines.append(f"- **{key}:** {value}")
|
|
1299
|
+
lines.append("")
|
|
1300
|
+
|
|
1301
|
+
# Informational recommendations (non-normative)
|
|
1302
|
+
warnings = compression_diag.get("warnings", [])
|
|
1303
|
+
if warnings:
|
|
1304
|
+
lines.append("**ℹ️ Informational:**")
|
|
1305
|
+
lines.append("")
|
|
1306
|
+
for warning in warnings:
|
|
1307
|
+
lines.append(f"- {warning}")
|
|
1308
|
+
lines.append("")
|
|
1309
|
+
|
|
1310
|
+
# Variance Guard (Spectral/RMT summaries are already provided above)
|
|
1311
|
+
variance = certificate["variance"]
|
|
1312
|
+
lines.append("## Variance Guard")
|
|
1313
|
+
|
|
1314
|
+
# Display whether VE was enabled after A/B test
|
|
1315
|
+
lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
|
|
1316
|
+
|
|
1317
|
+
if variance["enabled"]:
|
|
1318
|
+
# VE was enabled - show the gain
|
|
1319
|
+
gain_value = variance.get("gain", "N/A")
|
|
1320
|
+
if isinstance(gain_value, int | float):
|
|
1321
|
+
lines.append(f"- **Gain:** {gain_value:.3f}")
|
|
1322
|
+
else:
|
|
1323
|
+
lines.append(f"- **Gain:** {gain_value}")
|
|
1324
|
+
else:
|
|
1325
|
+
# VE was not enabled - show succinct reason if available, else a clear disabled message
|
|
1326
|
+
ppl_no_ve = variance.get("ppl_no_ve")
|
|
1327
|
+
ppl_with_ve = variance.get("ppl_with_ve")
|
|
1328
|
+
ratio_ci = variance.get("ratio_ci")
|
|
1329
|
+
if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
|
|
1330
|
+
lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
|
|
1331
|
+
lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
|
|
1332
|
+
gain_value = variance.get("gain")
|
|
1333
|
+
if isinstance(gain_value, int | float):
|
|
1334
|
+
lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
|
|
1335
|
+
else:
|
|
1336
|
+
lines.append(
|
|
1337
|
+
"- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
|
|
1338
|
+
)
|
|
1339
|
+
# Add concise rationale aligned with Balanced predictive gate contract
|
|
1340
|
+
try:
|
|
1341
|
+
ve_policy = certificate.get("policies", {}).get("variance", {})
|
|
1342
|
+
min_effect = ve_policy.get("min_effect_lognll")
|
|
1343
|
+
if isinstance(min_effect, int | float):
|
|
1344
|
+
lines.append(
|
|
1345
|
+
f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
|
|
1346
|
+
)
|
|
1347
|
+
else:
|
|
1348
|
+
lines.append(
|
|
1349
|
+
"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
|
|
1350
|
+
)
|
|
1351
|
+
lines.append(
|
|
1352
|
+
"- Predictive Gate: evaluated=false (disabled under current policy/edit)."
|
|
1353
|
+
)
|
|
1354
|
+
except Exception:
|
|
1355
|
+
pass
|
|
1356
|
+
|
|
1357
|
+
if variance.get("ratio_ci"):
|
|
1358
|
+
ratio_lo, ratio_hi = variance["ratio_ci"]
|
|
1359
|
+
lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
|
|
1360
|
+
|
|
1361
|
+
if variance.get("calibration") and variance.get("enabled"):
|
|
1362
|
+
calib = variance["calibration"]
|
|
1363
|
+
coverage = calib.get("coverage")
|
|
1364
|
+
requested = calib.get("requested")
|
|
1365
|
+
status = calib.get("status", "unknown")
|
|
1366
|
+
lines.append(f"- **Calibration:** {coverage}/{requested} windows ({status})")
|
|
1367
|
+
|
|
1368
|
+
lines.append("")
|
|
1369
|
+
|
|
1370
|
+
# MoE Observability (non-gating)
|
|
1371
|
+
moe = certificate.get("moe", {}) if isinstance(certificate.get("moe"), dict) else {}
|
|
1372
|
+
if moe:
|
|
1373
|
+
lines.append("## MoE Observability")
|
|
1374
|
+
lines.append("")
|
|
1375
|
+
# Core router fields
|
|
1376
|
+
for key in ("top_k", "capacity_factor", "expert_drop_rate"):
|
|
1377
|
+
if key in moe:
|
|
1378
|
+
lines.append(f"- **{key}:** {moe[key]}")
|
|
1379
|
+
# Utilization summary
|
|
1380
|
+
if "utilization_count" in moe or "utilization_mean" in moe:
|
|
1381
|
+
uc = moe.get("utilization_count")
|
|
1382
|
+
um = moe.get("utilization_mean")
|
|
1383
|
+
parts = []
|
|
1384
|
+
if uc is not None:
|
|
1385
|
+
parts.append(f"N={int(uc)}")
|
|
1386
|
+
if isinstance(um, int | float):
|
|
1387
|
+
parts.append(f"mean={um:.3f}")
|
|
1388
|
+
if parts:
|
|
1389
|
+
lines.append(f"- **Utilization:** {'; '.join(parts)}")
|
|
1390
|
+
# Delta summaries when available
|
|
1391
|
+
for key, label in (
|
|
1392
|
+
("delta_load_balance_loss", "Δ load_balance_loss"),
|
|
1393
|
+
("delta_router_entropy", "Δ router_entropy"),
|
|
1394
|
+
("delta_utilization_mean", "Δ utilization mean"),
|
|
1395
|
+
):
|
|
1396
|
+
if key in moe and isinstance(moe.get(key), int | float):
|
|
1397
|
+
lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
|
|
1398
|
+
lines.append("")
|
|
1399
|
+
|
|
1400
|
+
# Policy Summary
|
|
1401
|
+
lines.append("## Applied Policies")
|
|
1402
|
+
lines.append("")
|
|
1403
|
+
policies = certificate["policies"]
|
|
1404
|
+
for guard_name, policy in policies.items():
|
|
1405
|
+
lines.append(f"### {guard_name.title()}")
|
|
1406
|
+
lines.append("")
|
|
1407
|
+
policy_yaml = (
|
|
1408
|
+
yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
|
|
1409
|
+
)
|
|
1410
|
+
lines.append("```yaml")
|
|
1411
|
+
for line in policy_yaml:
|
|
1412
|
+
lines.append(line)
|
|
1413
|
+
lines.append("```")
|
|
1414
|
+
lines.append("")
|
|
1415
|
+
|
|
1416
|
+
# Artifacts
|
|
1417
|
+
lines.append("## Artifacts")
|
|
1418
|
+
lines.append("")
|
|
1419
|
+
artifacts = certificate["artifacts"]
|
|
1420
|
+
if artifacts.get("events_path"):
|
|
1421
|
+
lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
|
|
1422
|
+
if artifacts.get("report_path"):
|
|
1423
|
+
lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
|
|
1424
|
+
lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
|
|
1425
|
+
lines.append("")
|
|
1426
|
+
|
|
1427
|
+
# Certificate Hash for Integrity
|
|
1428
|
+
cert_hash = _compute_certificate_hash(certificate)
|
|
1429
|
+
lines.append("## Certificate Integrity")
|
|
1430
|
+
lines.append("")
|
|
1431
|
+
lines.append(f"**Certificate Hash:** `{cert_hash}`")
|
|
1432
|
+
lines.append("")
|
|
1433
|
+
lines.append("---")
|
|
1434
|
+
lines.append("")
|
|
1435
|
+
lines.append(
|
|
1436
|
+
"*This InvarLock safety certificate provides a comprehensive assessment of model compression safety.*"
|
|
1437
|
+
)
|
|
1438
|
+
lines.append(
|
|
1439
|
+
"*All metrics are compared against the uncompressed baseline model for safety validation.*"
|
|
1440
|
+
)
|
|
1441
|
+
|
|
1442
|
+
return "\n".join(lines)
|