invarlock 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invarlock/__init__.py +33 -0
- invarlock/__main__.py +10 -0
- invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
- invarlock/_data/runtime/profiles/release.yaml +23 -0
- invarlock/_data/runtime/tiers.yaml +76 -0
- invarlock/adapters/__init__.py +102 -0
- invarlock/adapters/_capabilities.py +45 -0
- invarlock/adapters/auto.py +99 -0
- invarlock/adapters/base.py +530 -0
- invarlock/adapters/base_types.py +85 -0
- invarlock/adapters/hf_bert.py +852 -0
- invarlock/adapters/hf_gpt2.py +403 -0
- invarlock/adapters/hf_llama.py +485 -0
- invarlock/adapters/hf_mixin.py +383 -0
- invarlock/adapters/hf_onnx.py +112 -0
- invarlock/adapters/hf_t5.py +137 -0
- invarlock/adapters/py.typed +1 -0
- invarlock/assurance/__init__.py +43 -0
- invarlock/cli/__init__.py +8 -0
- invarlock/cli/__main__.py +8 -0
- invarlock/cli/_evidence.py +25 -0
- invarlock/cli/_json.py +75 -0
- invarlock/cli/adapter_auto.py +162 -0
- invarlock/cli/app.py +287 -0
- invarlock/cli/commands/__init__.py +26 -0
- invarlock/cli/commands/certify.py +403 -0
- invarlock/cli/commands/doctor.py +1358 -0
- invarlock/cli/commands/explain_gates.py +151 -0
- invarlock/cli/commands/export_html.py +100 -0
- invarlock/cli/commands/plugins.py +1331 -0
- invarlock/cli/commands/report.py +354 -0
- invarlock/cli/commands/run.py +4146 -0
- invarlock/cli/commands/verify.py +1040 -0
- invarlock/cli/config.py +396 -0
- invarlock/cli/constants.py +68 -0
- invarlock/cli/device.py +92 -0
- invarlock/cli/doctor_helpers.py +74 -0
- invarlock/cli/errors.py +6 -0
- invarlock/cli/overhead_utils.py +60 -0
- invarlock/cli/provenance.py +66 -0
- invarlock/cli/utils.py +41 -0
- invarlock/config.py +56 -0
- invarlock/core/__init__.py +62 -0
- invarlock/core/abi.py +15 -0
- invarlock/core/api.py +274 -0
- invarlock/core/auto_tuning.py +317 -0
- invarlock/core/bootstrap.py +226 -0
- invarlock/core/checkpoint.py +221 -0
- invarlock/core/contracts.py +73 -0
- invarlock/core/error_utils.py +64 -0
- invarlock/core/events.py +298 -0
- invarlock/core/exceptions.py +95 -0
- invarlock/core/registry.py +481 -0
- invarlock/core/retry.py +146 -0
- invarlock/core/runner.py +2041 -0
- invarlock/core/types.py +154 -0
- invarlock/edits/__init__.py +12 -0
- invarlock/edits/_edit_utils.py +249 -0
- invarlock/edits/_external_utils.py +268 -0
- invarlock/edits/noop.py +47 -0
- invarlock/edits/py.typed +1 -0
- invarlock/edits/quant_rtn.py +801 -0
- invarlock/edits/registry.py +166 -0
- invarlock/eval/__init__.py +23 -0
- invarlock/eval/bench.py +1207 -0
- invarlock/eval/bootstrap.py +50 -0
- invarlock/eval/data.py +2052 -0
- invarlock/eval/metrics.py +2167 -0
- invarlock/eval/primary_metric.py +767 -0
- invarlock/eval/probes/__init__.py +24 -0
- invarlock/eval/probes/fft.py +139 -0
- invarlock/eval/probes/mi.py +213 -0
- invarlock/eval/probes/post_attention.py +323 -0
- invarlock/eval/providers/base.py +67 -0
- invarlock/eval/providers/seq2seq.py +111 -0
- invarlock/eval/providers/text_lm.py +113 -0
- invarlock/eval/providers/vision_text.py +93 -0
- invarlock/eval/py.typed +1 -0
- invarlock/guards/__init__.py +18 -0
- invarlock/guards/_contracts.py +9 -0
- invarlock/guards/invariants.py +640 -0
- invarlock/guards/policies.py +805 -0
- invarlock/guards/py.typed +1 -0
- invarlock/guards/rmt.py +2097 -0
- invarlock/guards/spectral.py +1419 -0
- invarlock/guards/tier_config.py +354 -0
- invarlock/guards/variance.py +3298 -0
- invarlock/guards_ref/__init__.py +15 -0
- invarlock/guards_ref/rmt_ref.py +40 -0
- invarlock/guards_ref/spectral_ref.py +135 -0
- invarlock/guards_ref/variance_ref.py +60 -0
- invarlock/model_profile.py +353 -0
- invarlock/model_utils.py +221 -0
- invarlock/observability/__init__.py +10 -0
- invarlock/observability/alerting.py +535 -0
- invarlock/observability/core.py +546 -0
- invarlock/observability/exporters.py +565 -0
- invarlock/observability/health.py +588 -0
- invarlock/observability/metrics.py +457 -0
- invarlock/observability/py.typed +1 -0
- invarlock/observability/utils.py +553 -0
- invarlock/plugins/__init__.py +12 -0
- invarlock/plugins/hello_guard.py +33 -0
- invarlock/plugins/hf_awq_adapter.py +82 -0
- invarlock/plugins/hf_bnb_adapter.py +79 -0
- invarlock/plugins/hf_gptq_adapter.py +78 -0
- invarlock/plugins/py.typed +1 -0
- invarlock/py.typed +1 -0
- invarlock/reporting/__init__.py +7 -0
- invarlock/reporting/certificate.py +3221 -0
- invarlock/reporting/certificate_schema.py +244 -0
- invarlock/reporting/dataset_hashing.py +215 -0
- invarlock/reporting/guards_analysis.py +948 -0
- invarlock/reporting/html.py +32 -0
- invarlock/reporting/normalizer.py +235 -0
- invarlock/reporting/policy_utils.py +517 -0
- invarlock/reporting/primary_metric_utils.py +265 -0
- invarlock/reporting/render.py +1442 -0
- invarlock/reporting/report.py +903 -0
- invarlock/reporting/report_types.py +278 -0
- invarlock/reporting/utils.py +175 -0
- invarlock/reporting/validate.py +631 -0
- invarlock/security.py +176 -0
- invarlock/sparsity_utils.py +323 -0
- invarlock/utils/__init__.py +150 -0
- invarlock/utils/digest.py +45 -0
- invarlock-0.2.0.dist-info/METADATA +586 -0
- invarlock-0.2.0.dist-info/RECORD +132 -0
- invarlock-0.2.0.dist-info/WHEEL +5 -0
- invarlock-0.2.0.dist-info/entry_points.txt +20 -0
- invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
- invarlock-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,903 @@
|
|
|
1
|
+
"""
|
|
2
|
+
InvarLock Unified Report Generation
|
|
3
|
+
==============================
|
|
4
|
+
|
|
5
|
+
Generate comprehensive reports from RunReport data in multiple formats.
|
|
6
|
+
Supports side-by-side comparison for bare vs guarded edit analysis.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import html
|
|
12
|
+
import json
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, cast
|
|
16
|
+
|
|
17
|
+
from invarlock.cli._evidence import maybe_dump_guard_evidence
|
|
18
|
+
|
|
19
|
+
from .certificate import (
|
|
20
|
+
make_certificate,
|
|
21
|
+
)
|
|
22
|
+
from .normalizer import normalize_run_report
|
|
23
|
+
from .render import render_certificate_markdown
|
|
24
|
+
from .report_types import RunReport, validate_report
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def to_json(report: RunReport, indent: int = 2) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Convert RunReport to formatted JSON string.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
report: RunReport to convert
|
|
33
|
+
indent: JSON indentation level
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Formatted JSON string
|
|
37
|
+
"""
|
|
38
|
+
if not validate_report(report):
|
|
39
|
+
raise ValueError("Invalid RunReport structure")
|
|
40
|
+
|
|
41
|
+
# Create a clean copy for JSON serialization
|
|
42
|
+
json_data = dict(report)
|
|
43
|
+
|
|
44
|
+
# Ensure all values are JSON serializable
|
|
45
|
+
json_data = _sanitize_for_json(json_data)
|
|
46
|
+
|
|
47
|
+
return json.dumps(json_data, indent=indent, ensure_ascii=False)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def to_markdown(
|
|
51
|
+
report: RunReport | dict[str, Any],
|
|
52
|
+
compare: RunReport | dict[str, Any] | None = None,
|
|
53
|
+
title: str | None = None,
|
|
54
|
+
) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Convert RunReport to Markdown format with optional comparison.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
report: Primary RunReport to convert
|
|
60
|
+
compare: Optional second report for side-by-side comparison
|
|
61
|
+
title: Optional title for the report
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Formatted Markdown string
|
|
65
|
+
"""
|
|
66
|
+
# Normalize external dicts to canonical RunReport
|
|
67
|
+
rp: RunReport = normalize_run_report(report) if isinstance(report, dict) else report
|
|
68
|
+
cmp: RunReport | None = (
|
|
69
|
+
normalize_run_report(compare) if isinstance(compare, dict) else compare
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if not validate_report(rp):
|
|
73
|
+
raise ValueError("Invalid primary RunReport structure")
|
|
74
|
+
if cmp and not validate_report(cmp):
|
|
75
|
+
raise ValueError("Invalid comparison RunReport structure")
|
|
76
|
+
|
|
77
|
+
lines = []
|
|
78
|
+
|
|
79
|
+
# Title
|
|
80
|
+
if title:
|
|
81
|
+
lines.append(f"# {title}")
|
|
82
|
+
elif compare:
|
|
83
|
+
lines.append("# InvarLock Evaluation Report Comparison")
|
|
84
|
+
else:
|
|
85
|
+
lines.append("# InvarLock Evaluation Report")
|
|
86
|
+
|
|
87
|
+
lines.append("")
|
|
88
|
+
lines.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}*")
|
|
89
|
+
lines.append("")
|
|
90
|
+
|
|
91
|
+
if cmp:
|
|
92
|
+
lines.extend(_generate_comparison_markdown(rp, cmp))
|
|
93
|
+
else:
|
|
94
|
+
lines.extend(_generate_single_markdown(rp))
|
|
95
|
+
|
|
96
|
+
return "\n".join(lines)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def to_html(
|
|
100
|
+
report: RunReport | dict[str, Any],
|
|
101
|
+
compare: RunReport | dict[str, Any] | None = None,
|
|
102
|
+
title: str | None = None,
|
|
103
|
+
include_css: bool = True,
|
|
104
|
+
) -> str:
|
|
105
|
+
"""
|
|
106
|
+
Convert RunReport to HTML format with optional comparison.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
report: Primary RunReport to convert
|
|
110
|
+
compare: Optional second report for side-by-side comparison
|
|
111
|
+
title: Optional title for the report
|
|
112
|
+
include_css: Whether to include CSS styling
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Formatted HTML string
|
|
116
|
+
"""
|
|
117
|
+
rp: RunReport = normalize_run_report(report) if isinstance(report, dict) else report
|
|
118
|
+
cmp: RunReport | None = (
|
|
119
|
+
normalize_run_report(compare) if isinstance(compare, dict) else compare
|
|
120
|
+
)
|
|
121
|
+
if not validate_report(rp):
|
|
122
|
+
raise ValueError("Invalid primary RunReport structure")
|
|
123
|
+
if cmp and not validate_report(cmp):
|
|
124
|
+
raise ValueError("Invalid comparison RunReport structure")
|
|
125
|
+
|
|
126
|
+
html_title = html.escape(
|
|
127
|
+
title
|
|
128
|
+
or ("InvarLock Report Comparison" if compare else "InvarLock Evaluation Report")
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
html_parts = [
|
|
132
|
+
"<!DOCTYPE html>",
|
|
133
|
+
"<html lang='en'>",
|
|
134
|
+
"<head>",
|
|
135
|
+
" <meta charset='UTF-8'>",
|
|
136
|
+
" <meta name='viewport' content='width=device-width, initial-scale=1.0'>",
|
|
137
|
+
f" <title>{html_title}</title>",
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
if include_css:
|
|
141
|
+
html_parts.append(_get_default_css())
|
|
142
|
+
|
|
143
|
+
html_parts.extend(
|
|
144
|
+
[
|
|
145
|
+
"</head>",
|
|
146
|
+
"<body>",
|
|
147
|
+
" <div class='container'>",
|
|
148
|
+
f" <h1>{html_title}</h1>",
|
|
149
|
+
f" <p class='timestamp'>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}</p>",
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
if cmp:
|
|
154
|
+
html_parts.extend(_generate_comparison_html(rp, cmp))
|
|
155
|
+
else:
|
|
156
|
+
html_parts.extend(_generate_single_html(rp))
|
|
157
|
+
|
|
158
|
+
html_parts.extend([" </div>", "</body>", "</html>"])
|
|
159
|
+
|
|
160
|
+
return "\n".join(html_parts)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def to_certificate(report: RunReport, baseline: RunReport, format: str = "json") -> str:
|
|
164
|
+
"""
|
|
165
|
+
Convert RunReport to certificate format.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
report: Primary RunReport to certify
|
|
169
|
+
baseline: Baseline RunReport for comparison
|
|
170
|
+
format: Output format ("json" or "markdown")
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Formatted certificate string
|
|
174
|
+
"""
|
|
175
|
+
if not validate_report(report):
|
|
176
|
+
raise ValueError("Invalid primary RunReport structure")
|
|
177
|
+
|
|
178
|
+
if not _validate_baseline_or_report(baseline):
|
|
179
|
+
raise ValueError("Invalid baseline RunReport structure")
|
|
180
|
+
|
|
181
|
+
# Generate certificate
|
|
182
|
+
certificate = make_certificate(report, baseline)
|
|
183
|
+
|
|
184
|
+
if format == "json":
|
|
185
|
+
return json.dumps(certificate, indent=2, ensure_ascii=False)
|
|
186
|
+
elif format == "markdown":
|
|
187
|
+
return render_certificate_markdown(certificate)
|
|
188
|
+
else:
|
|
189
|
+
raise ValueError(f"Unsupported certificate format: {format}")
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def save_report(
|
|
193
|
+
report: RunReport,
|
|
194
|
+
output_dir: str | Path,
|
|
195
|
+
formats: list[str] | None = None,
|
|
196
|
+
compare: RunReport | None = None,
|
|
197
|
+
baseline: RunReport | None = None,
|
|
198
|
+
filename_prefix: str = "report",
|
|
199
|
+
) -> dict[str, Path]:
|
|
200
|
+
"""
|
|
201
|
+
Save RunReport in multiple formats to a directory.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
report: RunReport to save
|
|
205
|
+
output_dir: Directory to save reports in
|
|
206
|
+
formats: List of formats to generate ("json", "markdown", "html", "cert")
|
|
207
|
+
compare: Optional comparison report
|
|
208
|
+
baseline: Optional baseline report for certificate generation
|
|
209
|
+
filename_prefix: Prefix for generated filenames
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
Dictionary mapping format names to generated file paths
|
|
213
|
+
"""
|
|
214
|
+
if formats is None:
|
|
215
|
+
formats = ["json", "markdown", "html"]
|
|
216
|
+
output_path = Path(output_dir)
|
|
217
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
218
|
+
|
|
219
|
+
saved_files = {}
|
|
220
|
+
|
|
221
|
+
suffix = "_comparison" if compare else ""
|
|
222
|
+
|
|
223
|
+
if "json" in formats:
|
|
224
|
+
json_path = output_path / f"{filename_prefix}{suffix}.json"
|
|
225
|
+
with open(json_path, "w", encoding="utf-8") as f:
|
|
226
|
+
f.write(to_json(report))
|
|
227
|
+
saved_files["json"] = json_path
|
|
228
|
+
|
|
229
|
+
if "markdown" in formats:
|
|
230
|
+
md_path = output_path / f"{filename_prefix}{suffix}.md"
|
|
231
|
+
with open(md_path, "w", encoding="utf-8") as f:
|
|
232
|
+
f.write(to_markdown(report, compare))
|
|
233
|
+
saved_files["markdown"] = md_path
|
|
234
|
+
|
|
235
|
+
if "html" in formats:
|
|
236
|
+
html_path = output_path / f"{filename_prefix}{suffix}.html"
|
|
237
|
+
with open(html_path, "w", encoding="utf-8") as f:
|
|
238
|
+
f.write(to_html(report, compare))
|
|
239
|
+
saved_files["html"] = html_path
|
|
240
|
+
|
|
241
|
+
if "cert" in formats:
|
|
242
|
+
if baseline is None:
|
|
243
|
+
raise ValueError("Baseline report required for certificate generation")
|
|
244
|
+
|
|
245
|
+
# Generate certificate JSON in canonical path/name
|
|
246
|
+
cert_json_path = output_path / "evaluation.cert.json"
|
|
247
|
+
with open(cert_json_path, "w", encoding="utf-8") as f:
|
|
248
|
+
f.write(to_certificate(report, baseline, format="json"))
|
|
249
|
+
saved_files["cert"] = cert_json_path
|
|
250
|
+
|
|
251
|
+
# Also emit a markdown variant for human consumption
|
|
252
|
+
cert_md_path = output_path / f"{filename_prefix}_certificate.md"
|
|
253
|
+
with open(cert_md_path, "w", encoding="utf-8") as f:
|
|
254
|
+
f.write(to_certificate(report, baseline, format="markdown"))
|
|
255
|
+
saved_files["cert_md"] = cert_md_path
|
|
256
|
+
|
|
257
|
+
# Emit a lightweight manifest to serve as an evidence bundle index
|
|
258
|
+
try:
|
|
259
|
+
from datetime import datetime as _dt
|
|
260
|
+
|
|
261
|
+
manifest = {
|
|
262
|
+
"generated_at": _dt.now().isoformat(),
|
|
263
|
+
"files": {
|
|
264
|
+
"certificate_json": str(cert_json_path),
|
|
265
|
+
"certificate_markdown": str(cert_md_path),
|
|
266
|
+
},
|
|
267
|
+
"summary": {
|
|
268
|
+
"run_model": (report.get("meta", {}) or {}).get("model_id"),
|
|
269
|
+
"device": (report.get("meta", {}) or {}).get("device"),
|
|
270
|
+
"seed": (report.get("meta", {}) or {}).get("seed"),
|
|
271
|
+
},
|
|
272
|
+
}
|
|
273
|
+
# Write debug evidence (tiny) when requested via env
|
|
274
|
+
guard_payload = {}
|
|
275
|
+
try:
|
|
276
|
+
guard_ctx = report.get("guards") or []
|
|
277
|
+
except Exception:
|
|
278
|
+
guard_ctx = []
|
|
279
|
+
if isinstance(guard_ctx, list) and guard_ctx:
|
|
280
|
+
tiny: list[dict] = []
|
|
281
|
+
for g in guard_ctx:
|
|
282
|
+
if isinstance(g, dict):
|
|
283
|
+
entry: dict[str, object] = {}
|
|
284
|
+
pol = g.get("policy") or {}
|
|
285
|
+
if isinstance(pol, dict):
|
|
286
|
+
for k in (
|
|
287
|
+
"deadband",
|
|
288
|
+
"min_effect_lognll",
|
|
289
|
+
"max_caps",
|
|
290
|
+
"sigma_quantile",
|
|
291
|
+
):
|
|
292
|
+
if k in pol:
|
|
293
|
+
entry[k] = pol[k]
|
|
294
|
+
if g.get("name"):
|
|
295
|
+
entry["name"] = g.get("name")
|
|
296
|
+
if entry:
|
|
297
|
+
tiny.append(entry)
|
|
298
|
+
if tiny:
|
|
299
|
+
guard_payload = {"guards_decisions": tiny}
|
|
300
|
+
# The helper will no-op unless INVARLOCK_EVIDENCE_DEBUG=1
|
|
301
|
+
if guard_payload:
|
|
302
|
+
maybe_dump_guard_evidence(output_path, guard_payload)
|
|
303
|
+
else:
|
|
304
|
+
maybe_dump_guard_evidence(output_path, {"guards_decisions": []})
|
|
305
|
+
|
|
306
|
+
ev_file = Path(output_path) / "guards_evidence.json"
|
|
307
|
+
if ev_file.exists():
|
|
308
|
+
manifest["evidence"] = {"guards_evidence": str(ev_file)}
|
|
309
|
+
|
|
310
|
+
(output_path / "manifest.json").write_text(
|
|
311
|
+
json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8"
|
|
312
|
+
)
|
|
313
|
+
saved_files["manifest"] = output_path / "manifest.json"
|
|
314
|
+
except Exception:
|
|
315
|
+
# Non-fatal; manifest is best-effort
|
|
316
|
+
pass
|
|
317
|
+
|
|
318
|
+
return saved_files
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
# ── Private helper functions ──────────────────────────────────────────────
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _validate_baseline_or_report(baseline: RunReport | dict[str, Any]) -> bool:
|
|
325
|
+
"""
|
|
326
|
+
Validate that a baseline is either a valid RunReport or a valid baseline format.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
baseline: Baseline data to validate
|
|
330
|
+
|
|
331
|
+
Returns:
|
|
332
|
+
True if valid, False otherwise
|
|
333
|
+
"""
|
|
334
|
+
# First try to validate as a RunReport
|
|
335
|
+
if isinstance(baseline, dict) and validate_report(cast(RunReport, baseline)):
|
|
336
|
+
return True
|
|
337
|
+
|
|
338
|
+
# If not a RunReport, check if it's a valid baseline format
|
|
339
|
+
try:
|
|
340
|
+
# Check for baseline schema (v1 only)
|
|
341
|
+
if isinstance(baseline, dict):
|
|
342
|
+
schema_version = baseline.get("schema_version")
|
|
343
|
+
if schema_version in ["baseline-v1"]:
|
|
344
|
+
# Validate required baseline fields
|
|
345
|
+
required_keys = {"meta", "metrics"}
|
|
346
|
+
if all(key in baseline for key in required_keys):
|
|
347
|
+
# Baseline must include primary_metric with at least a final value
|
|
348
|
+
metrics = baseline.get("metrics", {})
|
|
349
|
+
pm = (
|
|
350
|
+
metrics.get("primary_metric")
|
|
351
|
+
if isinstance(metrics, dict)
|
|
352
|
+
else None
|
|
353
|
+
)
|
|
354
|
+
if isinstance(pm, dict) and (pm.get("final") is not None):
|
|
355
|
+
return True
|
|
356
|
+
return False
|
|
357
|
+
except (KeyError, TypeError):
|
|
358
|
+
return False
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _sanitize_for_json(obj: Any) -> Any:
|
|
362
|
+
"""Recursively sanitize data for JSON serialization."""
|
|
363
|
+
if isinstance(obj, dict):
|
|
364
|
+
return {k: _sanitize_for_json(v) for k, v in obj.items()}
|
|
365
|
+
elif isinstance(obj, list | tuple):
|
|
366
|
+
return [_sanitize_for_json(item) for item in obj]
|
|
367
|
+
elif isinstance(obj, int | float | str | bool | type(None)):
|
|
368
|
+
return obj
|
|
369
|
+
elif hasattr(obj, "isoformat"): # datetime
|
|
370
|
+
return obj.isoformat()
|
|
371
|
+
else:
|
|
372
|
+
return str(obj)
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _generate_single_markdown(report: RunReport) -> list[str]:
|
|
376
|
+
"""Generate markdown for a single report."""
|
|
377
|
+
lines = []
|
|
378
|
+
|
|
379
|
+
# Executive Summary
|
|
380
|
+
lines.append("## Executive Summary")
|
|
381
|
+
lines.append("")
|
|
382
|
+
lines.append(f"- **Model**: {report['meta']['model_id']}")
|
|
383
|
+
lines.append(f"- **Edit**: {report['edit']['name']}")
|
|
384
|
+
# Primary metric (canonical)
|
|
385
|
+
pm = (
|
|
386
|
+
report.get("metrics", {}).get("primary_metric")
|
|
387
|
+
if isinstance(report.get("metrics"), dict)
|
|
388
|
+
else None
|
|
389
|
+
)
|
|
390
|
+
if isinstance(pm, dict) and pm:
|
|
391
|
+
kind = str(pm.get("kind") or "primary")
|
|
392
|
+
prev = pm.get("preview")
|
|
393
|
+
fin = pm.get("final")
|
|
394
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
395
|
+
parts = [f"- **Primary Metric** ({kind})"]
|
|
396
|
+
if isinstance(prev, int | float):
|
|
397
|
+
parts.append(f"preview={prev:.3f}")
|
|
398
|
+
if isinstance(fin, int | float):
|
|
399
|
+
parts.append(f"final={fin:.3f}")
|
|
400
|
+
if isinstance(ratio, int | float):
|
|
401
|
+
parts.append(f"ratio_vs_baseline={ratio:.3f}")
|
|
402
|
+
lines.append(" — ".join(parts) if len(parts) > 1 else parts[0])
|
|
403
|
+
else:
|
|
404
|
+
# When primary_metric is absent, do not attempt legacy fallbacks
|
|
405
|
+
lines.append("- **Primary Metric**: unavailable")
|
|
406
|
+
lines.append(
|
|
407
|
+
f"- **Parameters Changed**: {report['edit']['deltas']['params_changed']:,}"
|
|
408
|
+
)
|
|
409
|
+
lines.append(
|
|
410
|
+
f"- **Latency**: {report['metrics']['latency_ms_per_tok']:.2f} ms/token"
|
|
411
|
+
)
|
|
412
|
+
lines.append(f"- **Memory**: {report['metrics']['memory_mb_peak']:.1f} MB")
|
|
413
|
+
lines.append("")
|
|
414
|
+
|
|
415
|
+
# Model Information
|
|
416
|
+
lines.append("## Model Information")
|
|
417
|
+
lines.append("")
|
|
418
|
+
lines.append(f"- **Model ID**: {report['meta']['model_id']}")
|
|
419
|
+
lines.append(f"- **Adapter**: {report['meta']['adapter']}")
|
|
420
|
+
lines.append(f"- **Device**: {report['meta']['device']}")
|
|
421
|
+
lines.append(f"- **Commit**: {report['meta']['commit'][:8]}...")
|
|
422
|
+
lines.append(f"- **Timestamp**: {report['meta']['ts']}")
|
|
423
|
+
lines.append("")
|
|
424
|
+
|
|
425
|
+
# Evaluation Data
|
|
426
|
+
lines.append("## Evaluation Configuration")
|
|
427
|
+
lines.append("")
|
|
428
|
+
lines.append(f"- **Dataset**: {report['data']['dataset']}")
|
|
429
|
+
lines.append(f"- **Split**: {report['data']['split']}")
|
|
430
|
+
lines.append(f"- **Sequence Length**: {report['data']['seq_len']}")
|
|
431
|
+
lines.append(f"- **Preview Samples**: {report['data']['preview_n']}")
|
|
432
|
+
lines.append(f"- **Final Samples**: {report['data']['final_n']}")
|
|
433
|
+
lines.append("")
|
|
434
|
+
|
|
435
|
+
# Performance Metrics
|
|
436
|
+
if isinstance(pm, dict) and pm:
|
|
437
|
+
lines.append("## Primary Metric")
|
|
438
|
+
lines.append("")
|
|
439
|
+
lines.append("| Kind | Preview | Final | Ratio vs Baseline |")
|
|
440
|
+
lines.append("|------|---------|-------|-------------------|")
|
|
441
|
+
kind = str(pm.get("kind") or "primary")
|
|
442
|
+
prev = pm.get("preview")
|
|
443
|
+
fin = pm.get("final")
|
|
444
|
+
ratio = pm.get("ratio_vs_baseline")
|
|
445
|
+
|
|
446
|
+
def _fmt(x):
|
|
447
|
+
return f"{x:.3f}" if isinstance(x, int | float) else "N/A"
|
|
448
|
+
|
|
449
|
+
lines.append(f"| {kind} | {_fmt(prev)} | {_fmt(fin)} | {_fmt(ratio)} |")
|
|
450
|
+
lines.append("")
|
|
451
|
+
# Append system metrics
|
|
452
|
+
lines.append("## System Metrics")
|
|
453
|
+
lines.append("")
|
|
454
|
+
lines.append("| Metric | Value |")
|
|
455
|
+
lines.append("|--------|-------|")
|
|
456
|
+
lines.append(
|
|
457
|
+
f"| Latency (ms/token) | {report['metrics']['latency_ms_per_tok']:.2f} |"
|
|
458
|
+
)
|
|
459
|
+
lines.append(
|
|
460
|
+
f"| Peak Memory (MB) | {report['metrics']['memory_mb_peak']:.1f} |"
|
|
461
|
+
)
|
|
462
|
+
lines.append("")
|
|
463
|
+
else:
|
|
464
|
+
lines.append("## Performance Metrics")
|
|
465
|
+
lines.append("")
|
|
466
|
+
lines.append("| Metric | Value |")
|
|
467
|
+
lines.append("|--------|-------|")
|
|
468
|
+
# Primary metric block is unavailable; show system metrics only
|
|
469
|
+
lines.append(
|
|
470
|
+
f"| Latency (ms/token) | {report['metrics']['latency_ms_per_tok']:.2f} |"
|
|
471
|
+
)
|
|
472
|
+
lines.append(
|
|
473
|
+
f"| Peak Memory (MB) | {report['metrics']['memory_mb_peak']:.1f} |"
|
|
474
|
+
)
|
|
475
|
+
lines.append("")
|
|
476
|
+
|
|
477
|
+
# Edit Details
|
|
478
|
+
lines.append("## Edit Details")
|
|
479
|
+
lines.append("")
|
|
480
|
+
lines.append(f"- **Edit Type**: {report['edit']['name']}")
|
|
481
|
+
lines.append(f"- **Plan Digest**: `{report['edit']['plan_digest'][:16]}...`")
|
|
482
|
+
lines.append("")
|
|
483
|
+
lines.append("### Parameter Changes")
|
|
484
|
+
lines.append("")
|
|
485
|
+
deltas = report["edit"]["deltas"]
|
|
486
|
+
lines.append("| Change Type | Count |")
|
|
487
|
+
lines.append("|-------------|-------|")
|
|
488
|
+
lines.append(f"| Parameters Changed | {deltas['params_changed']:,} |")
|
|
489
|
+
lines.append(f"| Layers Modified | {deltas['layers_modified']} |")
|
|
490
|
+
if deltas["sparsity"] is not None:
|
|
491
|
+
lines.append(f"| Overall Sparsity | {deltas['sparsity']:.3f} |")
|
|
492
|
+
lines.append("")
|
|
493
|
+
|
|
494
|
+
# Guard Reports
|
|
495
|
+
if report["guards"]:
|
|
496
|
+
lines.append("## Guard Reports")
|
|
497
|
+
lines.append("")
|
|
498
|
+
for guard in report["guards"]:
|
|
499
|
+
lines.append(f"### {guard['name']}")
|
|
500
|
+
lines.append("")
|
|
501
|
+
|
|
502
|
+
# Guard metrics
|
|
503
|
+
if guard["metrics"]:
|
|
504
|
+
lines.append("**Metrics:**")
|
|
505
|
+
for metric, value in guard["metrics"].items():
|
|
506
|
+
lines.append(f"- {metric}: {value}")
|
|
507
|
+
lines.append("")
|
|
508
|
+
|
|
509
|
+
# Actions taken
|
|
510
|
+
if guard["actions"]:
|
|
511
|
+
lines.append("**Actions:**")
|
|
512
|
+
for action in guard["actions"]:
|
|
513
|
+
lines.append(f"- {action}")
|
|
514
|
+
lines.append("")
|
|
515
|
+
|
|
516
|
+
# Violations
|
|
517
|
+
if guard["violations"]:
|
|
518
|
+
lines.append("**Violations:**")
|
|
519
|
+
for violation in guard["violations"]:
|
|
520
|
+
lines.append(f"- ⚠️ {violation}")
|
|
521
|
+
lines.append("")
|
|
522
|
+
|
|
523
|
+
# Status Flags
|
|
524
|
+
lines.append("## Status")
|
|
525
|
+
lines.append("")
|
|
526
|
+
|
|
527
|
+
# Determine overall status
|
|
528
|
+
guards_passed = all(
|
|
529
|
+
len(guard.get("violations", [])) == 0 for guard in report["guards"]
|
|
530
|
+
)
|
|
531
|
+
has_rollback = report["flags"]["rollback_reason"] is not None
|
|
532
|
+
guard_recovery = report["flags"]["guard_recovered"]
|
|
533
|
+
|
|
534
|
+
if has_rollback:
|
|
535
|
+
lines.append(f"- 🔄 **ROLLBACK**: {report['flags']['rollback_reason']}")
|
|
536
|
+
lines.append("- ❌ Pipeline did not complete successfully")
|
|
537
|
+
elif guard_recovery:
|
|
538
|
+
lines.append("- ✅ Guard recovery was triggered")
|
|
539
|
+
lines.append("- ⚠️ Some guards detected issues but were resolved")
|
|
540
|
+
elif guards_passed:
|
|
541
|
+
lines.append("- ✅ **SUCCESS**: Pipeline completed successfully")
|
|
542
|
+
lines.append(f"- 🛡️ All {len(report['guards'])} guards passed validation")
|
|
543
|
+
lines.append("- 📊 Model modifications were approved and finalized")
|
|
544
|
+
else:
|
|
545
|
+
lines.append("- ⚠️ Some guards reported violations")
|
|
546
|
+
lines.append("- 🔍 Review guard reports above for details")
|
|
547
|
+
|
|
548
|
+
# Add performance summary based on primary metric
|
|
549
|
+
metrics_map_sum = cast(dict[str, Any], dict(report["metrics"]))
|
|
550
|
+
pm_sum = (
|
|
551
|
+
metrics_map_sum.get("primary_metric")
|
|
552
|
+
if isinstance(metrics_map_sum, dict)
|
|
553
|
+
else None
|
|
554
|
+
)
|
|
555
|
+
ratio_val = None
|
|
556
|
+
if isinstance(pm_sum, dict):
|
|
557
|
+
rv = pm_sum.get("ratio_vs_baseline")
|
|
558
|
+
if isinstance(rv, int | float):
|
|
559
|
+
ratio_val = float(rv)
|
|
560
|
+
params_changed = report["edit"]["deltas"]["params_changed"]
|
|
561
|
+
|
|
562
|
+
lines.append("")
|
|
563
|
+
lines.append("### Summary")
|
|
564
|
+
lines.append(f"- **Parameters Modified**: {params_changed:,}")
|
|
565
|
+
if isinstance(ratio_val, float):
|
|
566
|
+
lines.append(f"- **Performance Impact**: PM ratio {ratio_val:.3f}")
|
|
567
|
+
|
|
568
|
+
if params_changed > 0 and isinstance(ratio_val, float):
|
|
569
|
+
impact = (
|
|
570
|
+
"significant"
|
|
571
|
+
if ratio_val > 1.1
|
|
572
|
+
else "minimal"
|
|
573
|
+
if ratio_val < 1.05
|
|
574
|
+
else "moderate"
|
|
575
|
+
)
|
|
576
|
+
lines.append(
|
|
577
|
+
f"- **Assessment**: {impact.title()} model changes with {impact} performance impact"
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
return lines
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _generate_comparison_markdown(report1: RunReport, report2: RunReport) -> list[str]:
|
|
584
|
+
"""Generate side-by-side comparison markdown."""
|
|
585
|
+
lines = []
|
|
586
|
+
|
|
587
|
+
# Comparison Summary
|
|
588
|
+
lines.append("## Comparison Summary")
|
|
589
|
+
lines.append("")
|
|
590
|
+
lines.append("| Metric | Report 1 | Report 2 | Delta |")
|
|
591
|
+
lines.append("|--------|----------|----------|-------|")
|
|
592
|
+
|
|
593
|
+
# Compare primary metric (final) when present in both reports
|
|
594
|
+
pm1 = (
|
|
595
|
+
report1.get("metrics", {}).get("primary_metric")
|
|
596
|
+
if isinstance(report1.get("metrics"), dict)
|
|
597
|
+
else None
|
|
598
|
+
)
|
|
599
|
+
pm2 = (
|
|
600
|
+
report2.get("metrics", {}).get("primary_metric")
|
|
601
|
+
if isinstance(report2.get("metrics"), dict)
|
|
602
|
+
else None
|
|
603
|
+
)
|
|
604
|
+
if isinstance(pm1, dict) and isinstance(pm2, dict):
|
|
605
|
+
k1 = str(pm1.get("kind") or "primary")
|
|
606
|
+
k2 = str(pm2.get("kind") or "primary")
|
|
607
|
+
label = f"Primary Metric ({k1})" if k1 == k2 else "Primary Metric"
|
|
608
|
+
f1 = pm1.get("final")
|
|
609
|
+
f2 = pm2.get("final")
|
|
610
|
+
if isinstance(f1, int | float) and isinstance(f2, int | float):
|
|
611
|
+
delta = f2 - f1
|
|
612
|
+
sym = "📈" if delta > 0 else "📉" if delta < 0 else "➡️"
|
|
613
|
+
lines.append(f"| {label} | {f1:.3f} | {f2:.3f} | {sym} {delta:+.3f} |")
|
|
614
|
+
# If primary metrics are missing, omit the comparison row rather than falling back
|
|
615
|
+
|
|
616
|
+
# Latency comparison
|
|
617
|
+
lat1 = report1["metrics"]["latency_ms_per_tok"]
|
|
618
|
+
lat2 = report2["metrics"]["latency_ms_per_tok"]
|
|
619
|
+
lat_delta = lat2 - lat1
|
|
620
|
+
lat_symbol = "📈" if lat_delta > 0 else "📉" if lat_delta < 0 else "➡️"
|
|
621
|
+
lines.append(
|
|
622
|
+
f"| Latency (ms/tok) | {lat1:.2f} | {lat2:.2f} | {lat_symbol} {lat_delta:+.2f} |"
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
# Memory comparison
|
|
626
|
+
mem1 = report1["metrics"]["memory_mb_peak"]
|
|
627
|
+
mem2 = report2["metrics"]["memory_mb_peak"]
|
|
628
|
+
mem_delta = mem2 - mem1
|
|
629
|
+
mem_symbol = "📈" if mem_delta > 0 else "📉" if mem_delta < 0 else "➡️"
|
|
630
|
+
lines.append(
|
|
631
|
+
f"| Memory (MB) | {mem1:.1f} | {mem2:.1f} | {mem_symbol} {mem_delta:+.1f} |"
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
lines.append("")
|
|
635
|
+
|
|
636
|
+
# Side-by-side details
|
|
637
|
+
lines.append("## Detailed Comparison")
|
|
638
|
+
lines.append("")
|
|
639
|
+
lines.append("### Model Information")
|
|
640
|
+
lines.append("")
|
|
641
|
+
lines.append("| Aspect | Report 1 | Report 2 |")
|
|
642
|
+
lines.append("|--------|----------|----------|")
|
|
643
|
+
lines.append(
|
|
644
|
+
f"| Model | {report1['meta']['model_id']} | {report2['meta']['model_id']} |"
|
|
645
|
+
)
|
|
646
|
+
lines.append(f"| Edit | {report1['edit']['name']} | {report2['edit']['name']} |")
|
|
647
|
+
lines.append(
|
|
648
|
+
f"| Device | {report1['meta']['device']} | {report2['meta']['device']} |"
|
|
649
|
+
)
|
|
650
|
+
lines.append("")
|
|
651
|
+
|
|
652
|
+
# Parameter changes comparison
|
|
653
|
+
lines.append("### Parameter Changes")
|
|
654
|
+
lines.append("")
|
|
655
|
+
lines.append("| Change Type | Report 1 | Report 2 | Delta |")
|
|
656
|
+
lines.append("|-------------|----------|----------|-------|")
|
|
657
|
+
|
|
658
|
+
delta1 = report1["edit"]["deltas"]
|
|
659
|
+
delta2 = report2["edit"]["deltas"]
|
|
660
|
+
|
|
661
|
+
for key in ["params_changed", "layers_modified"]:
|
|
662
|
+
val1_obj = delta1.get(key, 0)
|
|
663
|
+
val2_obj = delta2.get(key, 0)
|
|
664
|
+
if isinstance(val1_obj, int | float | str):
|
|
665
|
+
try:
|
|
666
|
+
val1_i = int(val1_obj)
|
|
667
|
+
except Exception:
|
|
668
|
+
val1_i = 0
|
|
669
|
+
else:
|
|
670
|
+
val1_i = 0
|
|
671
|
+
if isinstance(val2_obj, int | float | str):
|
|
672
|
+
try:
|
|
673
|
+
val2_i = int(val2_obj)
|
|
674
|
+
except Exception:
|
|
675
|
+
val2_i = 0
|
|
676
|
+
else:
|
|
677
|
+
val2_i = 0
|
|
678
|
+
diff = val2_i - val1_i
|
|
679
|
+
diff_str = f"{diff:+,}" if key == "params_changed" else f"{diff:+d}"
|
|
680
|
+
lines.append(
|
|
681
|
+
f"| {key.replace('_', ' ').title()} | {val1_i:,} | {val2_i:,} | {diff_str} |"
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
lines.append("")
|
|
685
|
+
|
|
686
|
+
# Guard comparison
|
|
687
|
+
if report1["guards"] or report2["guards"]:
|
|
688
|
+
lines.append("### Guard Reports")
|
|
689
|
+
lines.append("")
|
|
690
|
+
|
|
691
|
+
# Get all guard names
|
|
692
|
+
guard_names1 = {g["name"] for g in report1["guards"]}
|
|
693
|
+
guard_names2 = {g["name"] for g in report2["guards"]}
|
|
694
|
+
all_guards = sorted(guard_names1 | guard_names2)
|
|
695
|
+
|
|
696
|
+
for guard_name in all_guards:
|
|
697
|
+
lines.append(f"#### {guard_name}")
|
|
698
|
+
lines.append("")
|
|
699
|
+
|
|
700
|
+
guard1 = next(
|
|
701
|
+
(g for g in report1["guards"] if g["name"] == guard_name), None
|
|
702
|
+
)
|
|
703
|
+
guard2 = next(
|
|
704
|
+
(g for g in report2["guards"] if g["name"] == guard_name), None
|
|
705
|
+
)
|
|
706
|
+
|
|
707
|
+
status1 = "Present" if guard1 else "Absent"
|
|
708
|
+
status2 = "Present" if guard2 else "Absent"
|
|
709
|
+
|
|
710
|
+
lines.append(f"- **Report 1**: {status1}")
|
|
711
|
+
lines.append(f"- **Report 2**: {status2}")
|
|
712
|
+
|
|
713
|
+
if guard1 and guard2:
|
|
714
|
+
if guard1["violations"] or guard2["violations"]:
|
|
715
|
+
lines.append("")
|
|
716
|
+
lines.append("**Violations:**")
|
|
717
|
+
lines.append(f"- Report 1: {len(guard1['violations'])} violations")
|
|
718
|
+
lines.append(f"- Report 2: {len(guard2['violations'])} violations")
|
|
719
|
+
|
|
720
|
+
lines.append("")
|
|
721
|
+
|
|
722
|
+
return lines
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _generate_single_html(report: RunReport) -> list[str]:
|
|
726
|
+
"""Generate HTML for a single report."""
|
|
727
|
+
lines = []
|
|
728
|
+
|
|
729
|
+
# Convert markdown to HTML structure
|
|
730
|
+
md_lines = _generate_single_markdown(report)
|
|
731
|
+
|
|
732
|
+
# This is a simplified conversion - in a full implementation,
|
|
733
|
+
# you might use a proper markdown-to-HTML converter
|
|
734
|
+
lines.append(" <div class='report-content'>")
|
|
735
|
+
|
|
736
|
+
in_table = False
|
|
737
|
+
for line in md_lines:
|
|
738
|
+
line = line.strip()
|
|
739
|
+
|
|
740
|
+
if line.startswith("# "):
|
|
741
|
+
lines.append(f" <h1>{html.escape(line[2:])}</h1>")
|
|
742
|
+
elif line.startswith("## "):
|
|
743
|
+
lines.append(f" <h2>{html.escape(line[3:])}</h2>")
|
|
744
|
+
elif line.startswith("### "):
|
|
745
|
+
lines.append(f" <h3>{html.escape(line[4:])}</h3>")
|
|
746
|
+
elif line.startswith("| ") and "|" in line[1:]:
|
|
747
|
+
if not in_table:
|
|
748
|
+
lines.append(" <table class='metrics-table'>")
|
|
749
|
+
in_table = True
|
|
750
|
+
|
|
751
|
+
cells = [cell.strip() for cell in line.split("|")[1:-1]]
|
|
752
|
+
if all(cell.startswith("-") for cell in cells):
|
|
753
|
+
continue # Skip separator row
|
|
754
|
+
|
|
755
|
+
if cells[0] in ["Metric", "Change Type", "Aspect"]:
|
|
756
|
+
lines.append(" <thead><tr>")
|
|
757
|
+
for cell in cells:
|
|
758
|
+
lines.append(f" <th>{html.escape(cell)}</th>")
|
|
759
|
+
lines.append(" </tr></thead><tbody>")
|
|
760
|
+
else:
|
|
761
|
+
lines.append(" <tr>")
|
|
762
|
+
for cell in cells:
|
|
763
|
+
lines.append(f" <td>{html.escape(cell)}</td>")
|
|
764
|
+
lines.append(" </tr>")
|
|
765
|
+
elif line.startswith("- "):
|
|
766
|
+
if in_table:
|
|
767
|
+
lines.append(" </tbody></table>")
|
|
768
|
+
in_table = False
|
|
769
|
+
lines.append(f" <li>{html.escape(line[2:])}</li>")
|
|
770
|
+
elif line == "":
|
|
771
|
+
if in_table:
|
|
772
|
+
lines.append(" </tbody></table>")
|
|
773
|
+
in_table = False
|
|
774
|
+
else:
|
|
775
|
+
if in_table:
|
|
776
|
+
lines.append(" </tbody></table>")
|
|
777
|
+
in_table = False
|
|
778
|
+
if line:
|
|
779
|
+
lines.append(f" <p>{html.escape(line)}</p>")
|
|
780
|
+
|
|
781
|
+
if in_table:
|
|
782
|
+
lines.append(" </tbody></table>")
|
|
783
|
+
|
|
784
|
+
lines.append(" </div>")
|
|
785
|
+
|
|
786
|
+
return lines
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def _generate_comparison_html(report1: RunReport, report2: RunReport) -> list[str]:
|
|
790
|
+
"""Generate HTML for comparison reports."""
|
|
791
|
+
lines = []
|
|
792
|
+
|
|
793
|
+
# Similar to single report but with comparison layout
|
|
794
|
+
md_lines = _generate_comparison_markdown(report1, report2)
|
|
795
|
+
|
|
796
|
+
lines.append(" <div class='comparison-content'>")
|
|
797
|
+
|
|
798
|
+
# Convert markdown lines to HTML (simplified)
|
|
799
|
+
in_table = False
|
|
800
|
+
for line in md_lines:
|
|
801
|
+
line = line.strip()
|
|
802
|
+
|
|
803
|
+
if line.startswith("## "):
|
|
804
|
+
lines.append(f" <h2>{html.escape(line[3:])}</h2>")
|
|
805
|
+
elif line.startswith("### "):
|
|
806
|
+
lines.append(f" <h3>{html.escape(line[4:])}</h3>")
|
|
807
|
+
elif line.startswith("| ") and "|" in line[1:]:
|
|
808
|
+
if not in_table:
|
|
809
|
+
lines.append(" <table class='comparison-table'>")
|
|
810
|
+
in_table = True
|
|
811
|
+
|
|
812
|
+
cells = [cell.strip() for cell in line.split("|")[1:-1]]
|
|
813
|
+
if all(cell.startswith("-") for cell in cells):
|
|
814
|
+
continue
|
|
815
|
+
|
|
816
|
+
if "Metric" in cells[0] or "Aspect" in cells[0]:
|
|
817
|
+
lines.append(" <thead><tr>")
|
|
818
|
+
for cell in cells:
|
|
819
|
+
lines.append(f" <th>{html.escape(cell)}</th>")
|
|
820
|
+
lines.append(" </tr></thead><tbody>")
|
|
821
|
+
else:
|
|
822
|
+
lines.append(" <tr>")
|
|
823
|
+
for cell in cells:
|
|
824
|
+
lines.append(f" <td>{html.escape(cell)}</td>")
|
|
825
|
+
lines.append(" </tr>")
|
|
826
|
+
elif line == "":
|
|
827
|
+
if in_table:
|
|
828
|
+
lines.append(" </tbody></table>")
|
|
829
|
+
in_table = False
|
|
830
|
+
else:
|
|
831
|
+
if in_table:
|
|
832
|
+
lines.append(" </tbody></table>")
|
|
833
|
+
in_table = False
|
|
834
|
+
if line:
|
|
835
|
+
lines.append(f" <p>{html.escape(line)}</p>")
|
|
836
|
+
|
|
837
|
+
if in_table:
|
|
838
|
+
lines.append(" </tbody></table>")
|
|
839
|
+
|
|
840
|
+
lines.append(" </div>")
|
|
841
|
+
|
|
842
|
+
return lines
|
|
843
|
+
|
|
844
|
+
|
|
845
|
+
def _get_default_css() -> str:
|
|
846
|
+
"""Get default CSS styling for HTML reports."""
|
|
847
|
+
return """ <style>
|
|
848
|
+
body {
|
|
849
|
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
|
|
850
|
+
line-height: 1.6;
|
|
851
|
+
color: #333;
|
|
852
|
+
max-width: 1200px;
|
|
853
|
+
margin: 0 auto;
|
|
854
|
+
padding: 20px;
|
|
855
|
+
background-color: #f8f9fa;
|
|
856
|
+
}
|
|
857
|
+
.container {
|
|
858
|
+
background: white;
|
|
859
|
+
padding: 30px;
|
|
860
|
+
border-radius: 8px;
|
|
861
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
862
|
+
}
|
|
863
|
+
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
|
|
864
|
+
h2 { color: #34495e; border-bottom: 1px solid #bdc3c7; padding-bottom: 5px; margin-top: 30px; }
|
|
865
|
+
h3 { color: #7f8c8d; margin-top: 25px; }
|
|
866
|
+
.timestamp { color: #95a5a6; font-style: italic; margin-bottom: 30px; }
|
|
867
|
+
.metrics-table, .comparison-table {
|
|
868
|
+
width: 100%;
|
|
869
|
+
border-collapse: collapse;
|
|
870
|
+
margin: 20px 0;
|
|
871
|
+
background: white;
|
|
872
|
+
}
|
|
873
|
+
.metrics-table th, .metrics-table td,
|
|
874
|
+
.comparison-table th, .comparison-table td {
|
|
875
|
+
padding: 12px;
|
|
876
|
+
text-align: left;
|
|
877
|
+
border-bottom: 1px solid #ecf0f1;
|
|
878
|
+
}
|
|
879
|
+
.metrics-table th, .comparison-table th {
|
|
880
|
+
background-color: #3498db;
|
|
881
|
+
color: white;
|
|
882
|
+
font-weight: 600;
|
|
883
|
+
}
|
|
884
|
+
.metrics-table tr:hover, .comparison-table tr:hover {
|
|
885
|
+
background-color: #f8f9fa;
|
|
886
|
+
}
|
|
887
|
+
li { margin: 5px 0; }
|
|
888
|
+
code {
|
|
889
|
+
background: #f1f2f6;
|
|
890
|
+
padding: 2px 6px;
|
|
891
|
+
border-radius: 3px;
|
|
892
|
+
font-family: 'Monaco', 'Consolas', monospace;
|
|
893
|
+
}
|
|
894
|
+
.comparison-content { display: block; }
|
|
895
|
+
@media (max-width: 768px) {
|
|
896
|
+
.container { padding: 15px; }
|
|
897
|
+
.metrics-table, .comparison-table { font-size: 14px; }
|
|
898
|
+
}
|
|
899
|
+
</style>"""
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
# Export public API
|
|
903
|
+
__all__ = ["to_json", "to_markdown", "to_html", "to_certificate", "save_report"]
|