invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,903 @@
1
+ """
2
+ InvarLock Unified Report Generation
3
+ ==============================
4
+
5
+ Generate comprehensive reports from RunReport data in multiple formats.
6
+ Supports side-by-side comparison for bare vs guarded edit analysis.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import html
12
+ import json
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+ from typing import Any, cast
16
+
17
+ from invarlock.cli._evidence import maybe_dump_guard_evidence
18
+
19
+ from .certificate import (
20
+ make_certificate,
21
+ )
22
+ from .normalizer import normalize_run_report
23
+ from .render import render_certificate_markdown
24
+ from .report_types import RunReport, validate_report
25
+
26
+
27
+ def to_json(report: RunReport, indent: int = 2) -> str:
28
+ """
29
+ Convert RunReport to formatted JSON string.
30
+
31
+ Args:
32
+ report: RunReport to convert
33
+ indent: JSON indentation level
34
+
35
+ Returns:
36
+ Formatted JSON string
37
+ """
38
+ if not validate_report(report):
39
+ raise ValueError("Invalid RunReport structure")
40
+
41
+ # Create a clean copy for JSON serialization
42
+ json_data = dict(report)
43
+
44
+ # Ensure all values are JSON serializable
45
+ json_data = _sanitize_for_json(json_data)
46
+
47
+ return json.dumps(json_data, indent=indent, ensure_ascii=False)
48
+
49
+
50
+ def to_markdown(
51
+ report: RunReport | dict[str, Any],
52
+ compare: RunReport | dict[str, Any] | None = None,
53
+ title: str | None = None,
54
+ ) -> str:
55
+ """
56
+ Convert RunReport to Markdown format with optional comparison.
57
+
58
+ Args:
59
+ report: Primary RunReport to convert
60
+ compare: Optional second report for side-by-side comparison
61
+ title: Optional title for the report
62
+
63
+ Returns:
64
+ Formatted Markdown string
65
+ """
66
+ # Normalize external dicts to canonical RunReport
67
+ rp: RunReport = normalize_run_report(report) if isinstance(report, dict) else report
68
+ cmp: RunReport | None = (
69
+ normalize_run_report(compare) if isinstance(compare, dict) else compare
70
+ )
71
+
72
+ if not validate_report(rp):
73
+ raise ValueError("Invalid primary RunReport structure")
74
+ if cmp and not validate_report(cmp):
75
+ raise ValueError("Invalid comparison RunReport structure")
76
+
77
+ lines = []
78
+
79
+ # Title
80
+ if title:
81
+ lines.append(f"# {title}")
82
+ elif compare:
83
+ lines.append("# InvarLock Evaluation Report Comparison")
84
+ else:
85
+ lines.append("# InvarLock Evaluation Report")
86
+
87
+ lines.append("")
88
+ lines.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}*")
89
+ lines.append("")
90
+
91
+ if cmp:
92
+ lines.extend(_generate_comparison_markdown(rp, cmp))
93
+ else:
94
+ lines.extend(_generate_single_markdown(rp))
95
+
96
+ return "\n".join(lines)
97
+
98
+
99
+ def to_html(
100
+ report: RunReport | dict[str, Any],
101
+ compare: RunReport | dict[str, Any] | None = None,
102
+ title: str | None = None,
103
+ include_css: bool = True,
104
+ ) -> str:
105
+ """
106
+ Convert RunReport to HTML format with optional comparison.
107
+
108
+ Args:
109
+ report: Primary RunReport to convert
110
+ compare: Optional second report for side-by-side comparison
111
+ title: Optional title for the report
112
+ include_css: Whether to include CSS styling
113
+
114
+ Returns:
115
+ Formatted HTML string
116
+ """
117
+ rp: RunReport = normalize_run_report(report) if isinstance(report, dict) else report
118
+ cmp: RunReport | None = (
119
+ normalize_run_report(compare) if isinstance(compare, dict) else compare
120
+ )
121
+ if not validate_report(rp):
122
+ raise ValueError("Invalid primary RunReport structure")
123
+ if cmp and not validate_report(cmp):
124
+ raise ValueError("Invalid comparison RunReport structure")
125
+
126
+ html_title = html.escape(
127
+ title
128
+ or ("InvarLock Report Comparison" if compare else "InvarLock Evaluation Report")
129
+ )
130
+
131
+ html_parts = [
132
+ "<!DOCTYPE html>",
133
+ "<html lang='en'>",
134
+ "<head>",
135
+ " <meta charset='UTF-8'>",
136
+ " <meta name='viewport' content='width=device-width, initial-scale=1.0'>",
137
+ f" <title>{html_title}</title>",
138
+ ]
139
+
140
+ if include_css:
141
+ html_parts.append(_get_default_css())
142
+
143
+ html_parts.extend(
144
+ [
145
+ "</head>",
146
+ "<body>",
147
+ " <div class='container'>",
148
+ f" <h1>{html_title}</h1>",
149
+ f" <p class='timestamp'>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}</p>",
150
+ ]
151
+ )
152
+
153
+ if cmp:
154
+ html_parts.extend(_generate_comparison_html(rp, cmp))
155
+ else:
156
+ html_parts.extend(_generate_single_html(rp))
157
+
158
+ html_parts.extend([" </div>", "</body>", "</html>"])
159
+
160
+ return "\n".join(html_parts)
161
+
162
+
163
+ def to_certificate(report: RunReport, baseline: RunReport, format: str = "json") -> str:
164
+ """
165
+ Convert RunReport to certificate format.
166
+
167
+ Args:
168
+ report: Primary RunReport to certify
169
+ baseline: Baseline RunReport for comparison
170
+ format: Output format ("json" or "markdown")
171
+
172
+ Returns:
173
+ Formatted certificate string
174
+ """
175
+ if not validate_report(report):
176
+ raise ValueError("Invalid primary RunReport structure")
177
+
178
+ if not _validate_baseline_or_report(baseline):
179
+ raise ValueError("Invalid baseline RunReport structure")
180
+
181
+ # Generate certificate
182
+ certificate = make_certificate(report, baseline)
183
+
184
+ if format == "json":
185
+ return json.dumps(certificate, indent=2, ensure_ascii=False)
186
+ elif format == "markdown":
187
+ return render_certificate_markdown(certificate)
188
+ else:
189
+ raise ValueError(f"Unsupported certificate format: {format}")
190
+
191
+
192
+ def save_report(
193
+ report: RunReport,
194
+ output_dir: str | Path,
195
+ formats: list[str] | None = None,
196
+ compare: RunReport | None = None,
197
+ baseline: RunReport | None = None,
198
+ filename_prefix: str = "report",
199
+ ) -> dict[str, Path]:
200
+ """
201
+ Save RunReport in multiple formats to a directory.
202
+
203
+ Args:
204
+ report: RunReport to save
205
+ output_dir: Directory to save reports in
206
+ formats: List of formats to generate ("json", "markdown", "html", "cert")
207
+ compare: Optional comparison report
208
+ baseline: Optional baseline report for certificate generation
209
+ filename_prefix: Prefix for generated filenames
210
+
211
+ Returns:
212
+ Dictionary mapping format names to generated file paths
213
+ """
214
+ if formats is None:
215
+ formats = ["json", "markdown", "html"]
216
+ output_path = Path(output_dir)
217
+ output_path.mkdir(parents=True, exist_ok=True)
218
+
219
+ saved_files = {}
220
+
221
+ suffix = "_comparison" if compare else ""
222
+
223
+ if "json" in formats:
224
+ json_path = output_path / f"{filename_prefix}{suffix}.json"
225
+ with open(json_path, "w", encoding="utf-8") as f:
226
+ f.write(to_json(report))
227
+ saved_files["json"] = json_path
228
+
229
+ if "markdown" in formats:
230
+ md_path = output_path / f"{filename_prefix}{suffix}.md"
231
+ with open(md_path, "w", encoding="utf-8") as f:
232
+ f.write(to_markdown(report, compare))
233
+ saved_files["markdown"] = md_path
234
+
235
+ if "html" in formats:
236
+ html_path = output_path / f"{filename_prefix}{suffix}.html"
237
+ with open(html_path, "w", encoding="utf-8") as f:
238
+ f.write(to_html(report, compare))
239
+ saved_files["html"] = html_path
240
+
241
+ if "cert" in formats:
242
+ if baseline is None:
243
+ raise ValueError("Baseline report required for certificate generation")
244
+
245
+ # Generate certificate JSON in canonical path/name
246
+ cert_json_path = output_path / "evaluation.cert.json"
247
+ with open(cert_json_path, "w", encoding="utf-8") as f:
248
+ f.write(to_certificate(report, baseline, format="json"))
249
+ saved_files["cert"] = cert_json_path
250
+
251
+ # Also emit a markdown variant for human consumption
252
+ cert_md_path = output_path / f"{filename_prefix}_certificate.md"
253
+ with open(cert_md_path, "w", encoding="utf-8") as f:
254
+ f.write(to_certificate(report, baseline, format="markdown"))
255
+ saved_files["cert_md"] = cert_md_path
256
+
257
+ # Emit a lightweight manifest to serve as an evidence bundle index
258
+ try:
259
+ from datetime import datetime as _dt
260
+
261
+ manifest = {
262
+ "generated_at": _dt.now().isoformat(),
263
+ "files": {
264
+ "certificate_json": str(cert_json_path),
265
+ "certificate_markdown": str(cert_md_path),
266
+ },
267
+ "summary": {
268
+ "run_model": (report.get("meta", {}) or {}).get("model_id"),
269
+ "device": (report.get("meta", {}) or {}).get("device"),
270
+ "seed": (report.get("meta", {}) or {}).get("seed"),
271
+ },
272
+ }
273
+ # Write debug evidence (tiny) when requested via env
274
+ guard_payload = {}
275
+ try:
276
+ guard_ctx = report.get("guards") or []
277
+ except Exception:
278
+ guard_ctx = []
279
+ if isinstance(guard_ctx, list) and guard_ctx:
280
+ tiny: list[dict] = []
281
+ for g in guard_ctx:
282
+ if isinstance(g, dict):
283
+ entry: dict[str, object] = {}
284
+ pol = g.get("policy") or {}
285
+ if isinstance(pol, dict):
286
+ for k in (
287
+ "deadband",
288
+ "min_effect_lognll",
289
+ "max_caps",
290
+ "sigma_quantile",
291
+ ):
292
+ if k in pol:
293
+ entry[k] = pol[k]
294
+ if g.get("name"):
295
+ entry["name"] = g.get("name")
296
+ if entry:
297
+ tiny.append(entry)
298
+ if tiny:
299
+ guard_payload = {"guards_decisions": tiny}
300
+ # The helper will no-op unless INVARLOCK_EVIDENCE_DEBUG=1
301
+ if guard_payload:
302
+ maybe_dump_guard_evidence(output_path, guard_payload)
303
+ else:
304
+ maybe_dump_guard_evidence(output_path, {"guards_decisions": []})
305
+
306
+ ev_file = Path(output_path) / "guards_evidence.json"
307
+ if ev_file.exists():
308
+ manifest["evidence"] = {"guards_evidence": str(ev_file)}
309
+
310
+ (output_path / "manifest.json").write_text(
311
+ json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8"
312
+ )
313
+ saved_files["manifest"] = output_path / "manifest.json"
314
+ except Exception:
315
+ # Non-fatal; manifest is best-effort
316
+ pass
317
+
318
+ return saved_files
319
+
320
+
321
+ # ── Private helper functions ──────────────────────────────────────────────
322
+
323
+
324
+ def _validate_baseline_or_report(baseline: RunReport | dict[str, Any]) -> bool:
325
+ """
326
+ Validate that a baseline is either a valid RunReport or a valid baseline format.
327
+
328
+ Args:
329
+ baseline: Baseline data to validate
330
+
331
+ Returns:
332
+ True if valid, False otherwise
333
+ """
334
+ # First try to validate as a RunReport
335
+ if isinstance(baseline, dict) and validate_report(cast(RunReport, baseline)):
336
+ return True
337
+
338
+ # If not a RunReport, check if it's a valid baseline format
339
+ try:
340
+ # Check for baseline schema (v1 only)
341
+ if isinstance(baseline, dict):
342
+ schema_version = baseline.get("schema_version")
343
+ if schema_version in ["baseline-v1"]:
344
+ # Validate required baseline fields
345
+ required_keys = {"meta", "metrics"}
346
+ if all(key in baseline for key in required_keys):
347
+ # Baseline must include primary_metric with at least a final value
348
+ metrics = baseline.get("metrics", {})
349
+ pm = (
350
+ metrics.get("primary_metric")
351
+ if isinstance(metrics, dict)
352
+ else None
353
+ )
354
+ if isinstance(pm, dict) and (pm.get("final") is not None):
355
+ return True
356
+ return False
357
+ except (KeyError, TypeError):
358
+ return False
359
+
360
+
361
+ def _sanitize_for_json(obj: Any) -> Any:
362
+ """Recursively sanitize data for JSON serialization."""
363
+ if isinstance(obj, dict):
364
+ return {k: _sanitize_for_json(v) for k, v in obj.items()}
365
+ elif isinstance(obj, list | tuple):
366
+ return [_sanitize_for_json(item) for item in obj]
367
+ elif isinstance(obj, int | float | str | bool | type(None)):
368
+ return obj
369
+ elif hasattr(obj, "isoformat"): # datetime
370
+ return obj.isoformat()
371
+ else:
372
+ return str(obj)
373
+
374
+
375
+ def _generate_single_markdown(report: RunReport) -> list[str]:
376
+ """Generate markdown for a single report."""
377
+ lines = []
378
+
379
+ # Executive Summary
380
+ lines.append("## Executive Summary")
381
+ lines.append("")
382
+ lines.append(f"- **Model**: {report['meta']['model_id']}")
383
+ lines.append(f"- **Edit**: {report['edit']['name']}")
384
+ # Primary metric (canonical)
385
+ pm = (
386
+ report.get("metrics", {}).get("primary_metric")
387
+ if isinstance(report.get("metrics"), dict)
388
+ else None
389
+ )
390
+ if isinstance(pm, dict) and pm:
391
+ kind = str(pm.get("kind") or "primary")
392
+ prev = pm.get("preview")
393
+ fin = pm.get("final")
394
+ ratio = pm.get("ratio_vs_baseline")
395
+ parts = [f"- **Primary Metric** ({kind})"]
396
+ if isinstance(prev, int | float):
397
+ parts.append(f"preview={prev:.3f}")
398
+ if isinstance(fin, int | float):
399
+ parts.append(f"final={fin:.3f}")
400
+ if isinstance(ratio, int | float):
401
+ parts.append(f"ratio_vs_baseline={ratio:.3f}")
402
+ lines.append(" — ".join(parts) if len(parts) > 1 else parts[0])
403
+ else:
404
+ # When primary_metric is absent, do not attempt legacy fallbacks
405
+ lines.append("- **Primary Metric**: unavailable")
406
+ lines.append(
407
+ f"- **Parameters Changed**: {report['edit']['deltas']['params_changed']:,}"
408
+ )
409
+ lines.append(
410
+ f"- **Latency**: {report['metrics']['latency_ms_per_tok']:.2f} ms/token"
411
+ )
412
+ lines.append(f"- **Memory**: {report['metrics']['memory_mb_peak']:.1f} MB")
413
+ lines.append("")
414
+
415
+ # Model Information
416
+ lines.append("## Model Information")
417
+ lines.append("")
418
+ lines.append(f"- **Model ID**: {report['meta']['model_id']}")
419
+ lines.append(f"- **Adapter**: {report['meta']['adapter']}")
420
+ lines.append(f"- **Device**: {report['meta']['device']}")
421
+ lines.append(f"- **Commit**: {report['meta']['commit'][:8]}...")
422
+ lines.append(f"- **Timestamp**: {report['meta']['ts']}")
423
+ lines.append("")
424
+
425
+ # Evaluation Data
426
+ lines.append("## Evaluation Configuration")
427
+ lines.append("")
428
+ lines.append(f"- **Dataset**: {report['data']['dataset']}")
429
+ lines.append(f"- **Split**: {report['data']['split']}")
430
+ lines.append(f"- **Sequence Length**: {report['data']['seq_len']}")
431
+ lines.append(f"- **Preview Samples**: {report['data']['preview_n']}")
432
+ lines.append(f"- **Final Samples**: {report['data']['final_n']}")
433
+ lines.append("")
434
+
435
+ # Performance Metrics
436
+ if isinstance(pm, dict) and pm:
437
+ lines.append("## Primary Metric")
438
+ lines.append("")
439
+ lines.append("| Kind | Preview | Final | Ratio vs Baseline |")
440
+ lines.append("|------|---------|-------|-------------------|")
441
+ kind = str(pm.get("kind") or "primary")
442
+ prev = pm.get("preview")
443
+ fin = pm.get("final")
444
+ ratio = pm.get("ratio_vs_baseline")
445
+
446
+ def _fmt(x):
447
+ return f"{x:.3f}" if isinstance(x, int | float) else "N/A"
448
+
449
+ lines.append(f"| {kind} | {_fmt(prev)} | {_fmt(fin)} | {_fmt(ratio)} |")
450
+ lines.append("")
451
+ # Append system metrics
452
+ lines.append("## System Metrics")
453
+ lines.append("")
454
+ lines.append("| Metric | Value |")
455
+ lines.append("|--------|-------|")
456
+ lines.append(
457
+ f"| Latency (ms/token) | {report['metrics']['latency_ms_per_tok']:.2f} |"
458
+ )
459
+ lines.append(
460
+ f"| Peak Memory (MB) | {report['metrics']['memory_mb_peak']:.1f} |"
461
+ )
462
+ lines.append("")
463
+ else:
464
+ lines.append("## Performance Metrics")
465
+ lines.append("")
466
+ lines.append("| Metric | Value |")
467
+ lines.append("|--------|-------|")
468
+ # Primary metric block is unavailable; show system metrics only
469
+ lines.append(
470
+ f"| Latency (ms/token) | {report['metrics']['latency_ms_per_tok']:.2f} |"
471
+ )
472
+ lines.append(
473
+ f"| Peak Memory (MB) | {report['metrics']['memory_mb_peak']:.1f} |"
474
+ )
475
+ lines.append("")
476
+
477
+ # Edit Details
478
+ lines.append("## Edit Details")
479
+ lines.append("")
480
+ lines.append(f"- **Edit Type**: {report['edit']['name']}")
481
+ lines.append(f"- **Plan Digest**: `{report['edit']['plan_digest'][:16]}...`")
482
+ lines.append("")
483
+ lines.append("### Parameter Changes")
484
+ lines.append("")
485
+ deltas = report["edit"]["deltas"]
486
+ lines.append("| Change Type | Count |")
487
+ lines.append("|-------------|-------|")
488
+ lines.append(f"| Parameters Changed | {deltas['params_changed']:,} |")
489
+ lines.append(f"| Layers Modified | {deltas['layers_modified']} |")
490
+ if deltas["sparsity"] is not None:
491
+ lines.append(f"| Overall Sparsity | {deltas['sparsity']:.3f} |")
492
+ lines.append("")
493
+
494
+ # Guard Reports
495
+ if report["guards"]:
496
+ lines.append("## Guard Reports")
497
+ lines.append("")
498
+ for guard in report["guards"]:
499
+ lines.append(f"### {guard['name']}")
500
+ lines.append("")
501
+
502
+ # Guard metrics
503
+ if guard["metrics"]:
504
+ lines.append("**Metrics:**")
505
+ for metric, value in guard["metrics"].items():
506
+ lines.append(f"- {metric}: {value}")
507
+ lines.append("")
508
+
509
+ # Actions taken
510
+ if guard["actions"]:
511
+ lines.append("**Actions:**")
512
+ for action in guard["actions"]:
513
+ lines.append(f"- {action}")
514
+ lines.append("")
515
+
516
+ # Violations
517
+ if guard["violations"]:
518
+ lines.append("**Violations:**")
519
+ for violation in guard["violations"]:
520
+ lines.append(f"- ⚠️ {violation}")
521
+ lines.append("")
522
+
523
+ # Status Flags
524
+ lines.append("## Status")
525
+ lines.append("")
526
+
527
+ # Determine overall status
528
+ guards_passed = all(
529
+ len(guard.get("violations", [])) == 0 for guard in report["guards"]
530
+ )
531
+ has_rollback = report["flags"]["rollback_reason"] is not None
532
+ guard_recovery = report["flags"]["guard_recovered"]
533
+
534
+ if has_rollback:
535
+ lines.append(f"- 🔄 **ROLLBACK**: {report['flags']['rollback_reason']}")
536
+ lines.append("- ❌ Pipeline did not complete successfully")
537
+ elif guard_recovery:
538
+ lines.append("- ✅ Guard recovery was triggered")
539
+ lines.append("- ⚠️ Some guards detected issues but were resolved")
540
+ elif guards_passed:
541
+ lines.append("- ✅ **SUCCESS**: Pipeline completed successfully")
542
+ lines.append(f"- 🛡️ All {len(report['guards'])} guards passed validation")
543
+ lines.append("- 📊 Model modifications were approved and finalized")
544
+ else:
545
+ lines.append("- ⚠️ Some guards reported violations")
546
+ lines.append("- 🔍 Review guard reports above for details")
547
+
548
+ # Add performance summary based on primary metric
549
+ metrics_map_sum = cast(dict[str, Any], dict(report["metrics"]))
550
+ pm_sum = (
551
+ metrics_map_sum.get("primary_metric")
552
+ if isinstance(metrics_map_sum, dict)
553
+ else None
554
+ )
555
+ ratio_val = None
556
+ if isinstance(pm_sum, dict):
557
+ rv = pm_sum.get("ratio_vs_baseline")
558
+ if isinstance(rv, int | float):
559
+ ratio_val = float(rv)
560
+ params_changed = report["edit"]["deltas"]["params_changed"]
561
+
562
+ lines.append("")
563
+ lines.append("### Summary")
564
+ lines.append(f"- **Parameters Modified**: {params_changed:,}")
565
+ if isinstance(ratio_val, float):
566
+ lines.append(f"- **Performance Impact**: PM ratio {ratio_val:.3f}")
567
+
568
+ if params_changed > 0 and isinstance(ratio_val, float):
569
+ impact = (
570
+ "significant"
571
+ if ratio_val > 1.1
572
+ else "minimal"
573
+ if ratio_val < 1.05
574
+ else "moderate"
575
+ )
576
+ lines.append(
577
+ f"- **Assessment**: {impact.title()} model changes with {impact} performance impact"
578
+ )
579
+
580
+ return lines
581
+
582
+
583
+ def _generate_comparison_markdown(report1: RunReport, report2: RunReport) -> list[str]:
584
+ """Generate side-by-side comparison markdown."""
585
+ lines = []
586
+
587
+ # Comparison Summary
588
+ lines.append("## Comparison Summary")
589
+ lines.append("")
590
+ lines.append("| Metric | Report 1 | Report 2 | Delta |")
591
+ lines.append("|--------|----------|----------|-------|")
592
+
593
+ # Compare primary metric (final) when present in both reports
594
+ pm1 = (
595
+ report1.get("metrics", {}).get("primary_metric")
596
+ if isinstance(report1.get("metrics"), dict)
597
+ else None
598
+ )
599
+ pm2 = (
600
+ report2.get("metrics", {}).get("primary_metric")
601
+ if isinstance(report2.get("metrics"), dict)
602
+ else None
603
+ )
604
+ if isinstance(pm1, dict) and isinstance(pm2, dict):
605
+ k1 = str(pm1.get("kind") or "primary")
606
+ k2 = str(pm2.get("kind") or "primary")
607
+ label = f"Primary Metric ({k1})" if k1 == k2 else "Primary Metric"
608
+ f1 = pm1.get("final")
609
+ f2 = pm2.get("final")
610
+ if isinstance(f1, int | float) and isinstance(f2, int | float):
611
+ delta = f2 - f1
612
+ sym = "📈" if delta > 0 else "📉" if delta < 0 else "➡️"
613
+ lines.append(f"| {label} | {f1:.3f} | {f2:.3f} | {sym} {delta:+.3f} |")
614
+ # If primary metrics are missing, omit the comparison row rather than falling back
615
+
616
+ # Latency comparison
617
+ lat1 = report1["metrics"]["latency_ms_per_tok"]
618
+ lat2 = report2["metrics"]["latency_ms_per_tok"]
619
+ lat_delta = lat2 - lat1
620
+ lat_symbol = "📈" if lat_delta > 0 else "📉" if lat_delta < 0 else "➡️"
621
+ lines.append(
622
+ f"| Latency (ms/tok) | {lat1:.2f} | {lat2:.2f} | {lat_symbol} {lat_delta:+.2f} |"
623
+ )
624
+
625
+ # Memory comparison
626
+ mem1 = report1["metrics"]["memory_mb_peak"]
627
+ mem2 = report2["metrics"]["memory_mb_peak"]
628
+ mem_delta = mem2 - mem1
629
+ mem_symbol = "📈" if mem_delta > 0 else "📉" if mem_delta < 0 else "➡️"
630
+ lines.append(
631
+ f"| Memory (MB) | {mem1:.1f} | {mem2:.1f} | {mem_symbol} {mem_delta:+.1f} |"
632
+ )
633
+
634
+ lines.append("")
635
+
636
+ # Side-by-side details
637
+ lines.append("## Detailed Comparison")
638
+ lines.append("")
639
+ lines.append("### Model Information")
640
+ lines.append("")
641
+ lines.append("| Aspect | Report 1 | Report 2 |")
642
+ lines.append("|--------|----------|----------|")
643
+ lines.append(
644
+ f"| Model | {report1['meta']['model_id']} | {report2['meta']['model_id']} |"
645
+ )
646
+ lines.append(f"| Edit | {report1['edit']['name']} | {report2['edit']['name']} |")
647
+ lines.append(
648
+ f"| Device | {report1['meta']['device']} | {report2['meta']['device']} |"
649
+ )
650
+ lines.append("")
651
+
652
+ # Parameter changes comparison
653
+ lines.append("### Parameter Changes")
654
+ lines.append("")
655
+ lines.append("| Change Type | Report 1 | Report 2 | Delta |")
656
+ lines.append("|-------------|----------|----------|-------|")
657
+
658
+ delta1 = report1["edit"]["deltas"]
659
+ delta2 = report2["edit"]["deltas"]
660
+
661
+ for key in ["params_changed", "layers_modified"]:
662
+ val1_obj = delta1.get(key, 0)
663
+ val2_obj = delta2.get(key, 0)
664
+ if isinstance(val1_obj, int | float | str):
665
+ try:
666
+ val1_i = int(val1_obj)
667
+ except Exception:
668
+ val1_i = 0
669
+ else:
670
+ val1_i = 0
671
+ if isinstance(val2_obj, int | float | str):
672
+ try:
673
+ val2_i = int(val2_obj)
674
+ except Exception:
675
+ val2_i = 0
676
+ else:
677
+ val2_i = 0
678
+ diff = val2_i - val1_i
679
+ diff_str = f"{diff:+,}" if key == "params_changed" else f"{diff:+d}"
680
+ lines.append(
681
+ f"| {key.replace('_', ' ').title()} | {val1_i:,} | {val2_i:,} | {diff_str} |"
682
+ )
683
+
684
+ lines.append("")
685
+
686
+ # Guard comparison
687
+ if report1["guards"] or report2["guards"]:
688
+ lines.append("### Guard Reports")
689
+ lines.append("")
690
+
691
+ # Get all guard names
692
+ guard_names1 = {g["name"] for g in report1["guards"]}
693
+ guard_names2 = {g["name"] for g in report2["guards"]}
694
+ all_guards = sorted(guard_names1 | guard_names2)
695
+
696
+ for guard_name in all_guards:
697
+ lines.append(f"#### {guard_name}")
698
+ lines.append("")
699
+
700
+ guard1 = next(
701
+ (g for g in report1["guards"] if g["name"] == guard_name), None
702
+ )
703
+ guard2 = next(
704
+ (g for g in report2["guards"] if g["name"] == guard_name), None
705
+ )
706
+
707
+ status1 = "Present" if guard1 else "Absent"
708
+ status2 = "Present" if guard2 else "Absent"
709
+
710
+ lines.append(f"- **Report 1**: {status1}")
711
+ lines.append(f"- **Report 2**: {status2}")
712
+
713
+ if guard1 and guard2:
714
+ if guard1["violations"] or guard2["violations"]:
715
+ lines.append("")
716
+ lines.append("**Violations:**")
717
+ lines.append(f"- Report 1: {len(guard1['violations'])} violations")
718
+ lines.append(f"- Report 2: {len(guard2['violations'])} violations")
719
+
720
+ lines.append("")
721
+
722
+ return lines
723
+
724
+
725
+ def _generate_single_html(report: RunReport) -> list[str]:
726
+ """Generate HTML for a single report."""
727
+ lines = []
728
+
729
+ # Convert markdown to HTML structure
730
+ md_lines = _generate_single_markdown(report)
731
+
732
+ # This is a simplified conversion - in a full implementation,
733
+ # you might use a proper markdown-to-HTML converter
734
+ lines.append(" <div class='report-content'>")
735
+
736
+ in_table = False
737
+ for line in md_lines:
738
+ line = line.strip()
739
+
740
+ if line.startswith("# "):
741
+ lines.append(f" <h1>{html.escape(line[2:])}</h1>")
742
+ elif line.startswith("## "):
743
+ lines.append(f" <h2>{html.escape(line[3:])}</h2>")
744
+ elif line.startswith("### "):
745
+ lines.append(f" <h3>{html.escape(line[4:])}</h3>")
746
+ elif line.startswith("| ") and "|" in line[1:]:
747
+ if not in_table:
748
+ lines.append(" <table class='metrics-table'>")
749
+ in_table = True
750
+
751
+ cells = [cell.strip() for cell in line.split("|")[1:-1]]
752
+ if all(cell.startswith("-") for cell in cells):
753
+ continue # Skip separator row
754
+
755
+ if cells[0] in ["Metric", "Change Type", "Aspect"]:
756
+ lines.append(" <thead><tr>")
757
+ for cell in cells:
758
+ lines.append(f" <th>{html.escape(cell)}</th>")
759
+ lines.append(" </tr></thead><tbody>")
760
+ else:
761
+ lines.append(" <tr>")
762
+ for cell in cells:
763
+ lines.append(f" <td>{html.escape(cell)}</td>")
764
+ lines.append(" </tr>")
765
+ elif line.startswith("- "):
766
+ if in_table:
767
+ lines.append(" </tbody></table>")
768
+ in_table = False
769
+ lines.append(f" <li>{html.escape(line[2:])}</li>")
770
+ elif line == "":
771
+ if in_table:
772
+ lines.append(" </tbody></table>")
773
+ in_table = False
774
+ else:
775
+ if in_table:
776
+ lines.append(" </tbody></table>")
777
+ in_table = False
778
+ if line:
779
+ lines.append(f" <p>{html.escape(line)}</p>")
780
+
781
+ if in_table:
782
+ lines.append(" </tbody></table>")
783
+
784
+ lines.append(" </div>")
785
+
786
+ return lines
787
+
788
+
789
+ def _generate_comparison_html(report1: RunReport, report2: RunReport) -> list[str]:
790
+ """Generate HTML for comparison reports."""
791
+ lines = []
792
+
793
+ # Similar to single report but with comparison layout
794
+ md_lines = _generate_comparison_markdown(report1, report2)
795
+
796
+ lines.append(" <div class='comparison-content'>")
797
+
798
+ # Convert markdown lines to HTML (simplified)
799
+ in_table = False
800
+ for line in md_lines:
801
+ line = line.strip()
802
+
803
+ if line.startswith("## "):
804
+ lines.append(f" <h2>{html.escape(line[3:])}</h2>")
805
+ elif line.startswith("### "):
806
+ lines.append(f" <h3>{html.escape(line[4:])}</h3>")
807
+ elif line.startswith("| ") and "|" in line[1:]:
808
+ if not in_table:
809
+ lines.append(" <table class='comparison-table'>")
810
+ in_table = True
811
+
812
+ cells = [cell.strip() for cell in line.split("|")[1:-1]]
813
+ if all(cell.startswith("-") for cell in cells):
814
+ continue
815
+
816
+ if "Metric" in cells[0] or "Aspect" in cells[0]:
817
+ lines.append(" <thead><tr>")
818
+ for cell in cells:
819
+ lines.append(f" <th>{html.escape(cell)}</th>")
820
+ lines.append(" </tr></thead><tbody>")
821
+ else:
822
+ lines.append(" <tr>")
823
+ for cell in cells:
824
+ lines.append(f" <td>{html.escape(cell)}</td>")
825
+ lines.append(" </tr>")
826
+ elif line == "":
827
+ if in_table:
828
+ lines.append(" </tbody></table>")
829
+ in_table = False
830
+ else:
831
+ if in_table:
832
+ lines.append(" </tbody></table>")
833
+ in_table = False
834
+ if line:
835
+ lines.append(f" <p>{html.escape(line)}</p>")
836
+
837
+ if in_table:
838
+ lines.append(" </tbody></table>")
839
+
840
+ lines.append(" </div>")
841
+
842
+ return lines
843
+
844
+
845
+ def _get_default_css() -> str:
846
+ """Get default CSS styling for HTML reports."""
847
+ return """ <style>
848
+ body {
849
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
850
+ line-height: 1.6;
851
+ color: #333;
852
+ max-width: 1200px;
853
+ margin: 0 auto;
854
+ padding: 20px;
855
+ background-color: #f8f9fa;
856
+ }
857
+ .container {
858
+ background: white;
859
+ padding: 30px;
860
+ border-radius: 8px;
861
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
862
+ }
863
+ h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
864
+ h2 { color: #34495e; border-bottom: 1px solid #bdc3c7; padding-bottom: 5px; margin-top: 30px; }
865
+ h3 { color: #7f8c8d; margin-top: 25px; }
866
+ .timestamp { color: #95a5a6; font-style: italic; margin-bottom: 30px; }
867
+ .metrics-table, .comparison-table {
868
+ width: 100%;
869
+ border-collapse: collapse;
870
+ margin: 20px 0;
871
+ background: white;
872
+ }
873
+ .metrics-table th, .metrics-table td,
874
+ .comparison-table th, .comparison-table td {
875
+ padding: 12px;
876
+ text-align: left;
877
+ border-bottom: 1px solid #ecf0f1;
878
+ }
879
+ .metrics-table th, .comparison-table th {
880
+ background-color: #3498db;
881
+ color: white;
882
+ font-weight: 600;
883
+ }
884
+ .metrics-table tr:hover, .comparison-table tr:hover {
885
+ background-color: #f8f9fa;
886
+ }
887
+ li { margin: 5px 0; }
888
+ code {
889
+ background: #f1f2f6;
890
+ padding: 2px 6px;
891
+ border-radius: 3px;
892
+ font-family: 'Monaco', 'Consolas', monospace;
893
+ }
894
+ .comparison-content { display: block; }
895
+ @media (max-width: 768px) {
896
+ .container { padding: 15px; }
897
+ .metrics-table, .comparison-table { font-size: 14px; }
898
+ }
899
+ </style>"""
900
+
901
+
902
+ # Export public API
903
+ __all__ = ["to_json", "to_markdown", "to_html", "to_certificate", "save_report"]