haoline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. haoline/.streamlit/config.toml +10 -0
  2. haoline/__init__.py +248 -0
  3. haoline/analyzer.py +935 -0
  4. haoline/cli.py +2712 -0
  5. haoline/compare.py +811 -0
  6. haoline/compare_visualizations.py +1564 -0
  7. haoline/edge_analysis.py +525 -0
  8. haoline/eval/__init__.py +131 -0
  9. haoline/eval/adapters.py +844 -0
  10. haoline/eval/cli.py +390 -0
  11. haoline/eval/comparison.py +542 -0
  12. haoline/eval/deployment.py +633 -0
  13. haoline/eval/schemas.py +833 -0
  14. haoline/examples/__init__.py +15 -0
  15. haoline/examples/basic_inspection.py +74 -0
  16. haoline/examples/compare_models.py +117 -0
  17. haoline/examples/hardware_estimation.py +78 -0
  18. haoline/format_adapters.py +1001 -0
  19. haoline/formats/__init__.py +123 -0
  20. haoline/formats/coreml.py +250 -0
  21. haoline/formats/gguf.py +483 -0
  22. haoline/formats/openvino.py +255 -0
  23. haoline/formats/safetensors.py +273 -0
  24. haoline/formats/tflite.py +369 -0
  25. haoline/hardware.py +2307 -0
  26. haoline/hierarchical_graph.py +462 -0
  27. haoline/html_export.py +1573 -0
  28. haoline/layer_summary.py +769 -0
  29. haoline/llm_summarizer.py +465 -0
  30. haoline/op_icons.py +618 -0
  31. haoline/operational_profiling.py +1492 -0
  32. haoline/patterns.py +1116 -0
  33. haoline/pdf_generator.py +265 -0
  34. haoline/privacy.py +250 -0
  35. haoline/pydantic_models.py +241 -0
  36. haoline/report.py +1923 -0
  37. haoline/report_sections.py +539 -0
  38. haoline/risks.py +521 -0
  39. haoline/schema.py +523 -0
  40. haoline/streamlit_app.py +2024 -0
  41. haoline/tests/__init__.py +4 -0
  42. haoline/tests/conftest.py +123 -0
  43. haoline/tests/test_analyzer.py +868 -0
  44. haoline/tests/test_compare_visualizations.py +293 -0
  45. haoline/tests/test_edge_analysis.py +243 -0
  46. haoline/tests/test_eval.py +604 -0
  47. haoline/tests/test_format_adapters.py +460 -0
  48. haoline/tests/test_hardware.py +237 -0
  49. haoline/tests/test_hardware_recommender.py +90 -0
  50. haoline/tests/test_hierarchical_graph.py +326 -0
  51. haoline/tests/test_html_export.py +180 -0
  52. haoline/tests/test_layer_summary.py +428 -0
  53. haoline/tests/test_llm_patterns.py +540 -0
  54. haoline/tests/test_llm_summarizer.py +339 -0
  55. haoline/tests/test_patterns.py +774 -0
  56. haoline/tests/test_pytorch.py +327 -0
  57. haoline/tests/test_report.py +383 -0
  58. haoline/tests/test_risks.py +398 -0
  59. haoline/tests/test_schema.py +417 -0
  60. haoline/tests/test_tensorflow.py +380 -0
  61. haoline/tests/test_visualizations.py +316 -0
  62. haoline/universal_ir.py +856 -0
  63. haoline/visualizations.py +1086 -0
  64. haoline/visualize_yolo.py +44 -0
  65. haoline/web.py +110 -0
  66. haoline-0.3.0.dist-info/METADATA +471 -0
  67. haoline-0.3.0.dist-info/RECORD +70 -0
  68. haoline-0.3.0.dist-info/WHEEL +4 -0
  69. haoline-0.3.0.dist-info/entry_points.txt +5 -0
  70. haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,1564 @@
1
+ #!/usr/bin/env python
2
+ # Copyright (c) 2025 HaoLine Contributors
3
+ # SPDX-License-Identifier: MIT
4
+
5
+ """
6
+ Compare Mode Visualizations for Quantization Impact Reports.
7
+
8
+ Story 6.4: Quantization Impact Report (TRT EngineXplorer-inspired)
9
+ ------------------------------------------------------------------
10
+
11
+ This module provides visualization and analysis functions for multi-model
12
+ comparison reports. It generates:
13
+
14
+ - Accuracy vs Speedup tradeoff charts
15
+ - Memory savings analysis
16
+ - Layer-wise precision breakdown
17
+ - Trade-off analysis summaries
18
+ - Calibration recommendations
19
+
20
+ Requires matplotlib for chart generation (optional graceful fallback).
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import base64
26
+ import logging
27
+ from collections.abc import Sequence
28
+ from dataclasses import dataclass
29
+ from io import BytesIO
30
+ from pathlib import Path
31
+ from typing import Any
32
+
33
+ LOGGER = logging.getLogger("haoline.compare_viz")
34
+
35
+ # Try to import matplotlib
36
+ try:
37
+ import matplotlib
38
+
39
+ matplotlib.use("Agg") # Non-interactive backend
40
+ import matplotlib.pyplot as plt
41
+ from matplotlib.figure import Figure
42
+
43
+ MATPLOTLIB_AVAILABLE = True
44
+ except ImportError:
45
+ MATPLOTLIB_AVAILABLE = False
46
+ plt = None # type: ignore
47
+ Figure = None # type: ignore
48
+
49
+
50
+ @dataclass
51
+ class TradeoffPoint:
52
+ """A single point on the accuracy vs speedup tradeoff curve."""
53
+
54
+ precision: str
55
+ speedup: float # Relative to baseline (1.0 = same speed)
56
+ accuracy_delta: float # Relative to baseline (negative = worse)
57
+ size_ratio: float # Relative to baseline (< 1.0 = smaller)
58
+ memory_ratio: float # Relative to baseline (< 1.0 = smaller)
59
+
60
+
61
+ @dataclass
62
+ class LayerPrecisionBreakdown:
63
+ """Precision breakdown for a single layer."""
64
+
65
+ layer_name: str
66
+ op_type: str
67
+ precision: str
68
+ param_count: int
69
+ flops: int
70
+ memory_bytes: int
71
+
72
+
73
+ @dataclass
74
+ class CalibrationRecommendation:
75
+ """Recommendation for quantization calibration."""
76
+
77
+ recommendation: str
78
+ reason: str
79
+ severity: str # "info", "warning", "critical"
80
+ affected_layers: list[str]
81
+
82
+
83
+ def is_available() -> bool:
84
+ """Check if visualization is available (matplotlib installed)."""
85
+ return MATPLOTLIB_AVAILABLE
86
+
87
+
88
+ def extract_layer_precision_breakdown(
89
+ variant_report: Any,
90
+ precision: str,
91
+ ) -> list[LayerPrecisionBreakdown]:
92
+ """
93
+ Extract per-layer precision breakdown from an inspection report.
94
+
95
+ Task 6.4.4: Layer-wise precision breakdown
96
+
97
+ Returns a list of LayerPrecisionBreakdown for each layer/op in the model.
98
+ """
99
+ breakdown: list[LayerPrecisionBreakdown] = []
100
+
101
+ # Get layer summary if available
102
+ layer_summary = getattr(variant_report, "layer_summary", None)
103
+ if layer_summary is None:
104
+ return breakdown
105
+
106
+ layers = getattr(layer_summary, "layers", [])
107
+ for layer in layers:
108
+ layer_name = getattr(layer, "name", "unknown")
109
+ op_type = getattr(layer, "op_type", "unknown")
110
+ params = getattr(layer, "param_count", 0)
111
+ flops = getattr(layer, "flops", 0)
112
+ memory = getattr(layer, "memory_bytes", 0)
113
+
114
+ breakdown.append(
115
+ LayerPrecisionBreakdown(
116
+ layer_name=layer_name,
117
+ op_type=op_type,
118
+ precision=precision,
119
+ param_count=params,
120
+ flops=flops,
121
+ memory_bytes=memory,
122
+ )
123
+ )
124
+
125
+ return breakdown
126
+
127
+
128
+ def generate_layer_precision_chart(
129
+ breakdowns: dict[str, list[LayerPrecisionBreakdown]],
130
+ output_path: Path | None = None,
131
+ title: str = "Per-Layer Precision Comparison",
132
+ top_n: int = 20,
133
+ ) -> bytes | None:
134
+ """
135
+ Generate a chart showing per-layer precision breakdown.
136
+
137
+ Task 6.4.4: Layer-wise precision breakdown visualization
138
+
139
+ Args:
140
+ breakdowns: Dict mapping precision to list of layer breakdowns
141
+ output_path: Optional path to save the chart
142
+ title: Chart title
143
+ top_n: Number of top layers to show
144
+
145
+ Returns:
146
+ PNG bytes if successful, None otherwise
147
+ """
148
+ if not MATPLOTLIB_AVAILABLE or not breakdowns:
149
+ return None
150
+
151
+ plt.style.use("dark_background")
152
+
153
+ fig, ax = plt.subplots(figsize=(12, 8))
154
+ fig.patch.set_facecolor("#0a0a0a")
155
+ ax.set_facecolor("#1a1a1a")
156
+
157
+ # Get all precisions and sort layers by FLOPs (from first precision)
158
+ precisions = list(breakdowns.keys())
159
+ if not precisions:
160
+ return None
161
+
162
+ first_breakdown = breakdowns[precisions[0]]
163
+ # Sort by FLOPs descending and take top N
164
+ sorted_layers = sorted(first_breakdown, key=lambda x: x.flops, reverse=True)[:top_n]
165
+ layer_names = [layer.layer_name[:30] for layer in sorted_layers] # Truncate long names
166
+
167
+ # Colors for different precisions
168
+ precision_colors = {
169
+ "fp32": "#4A90D9",
170
+ "fp16": "#30D158",
171
+ "bf16": "#64D2FF",
172
+ "int8": "#FFD60A",
173
+ "int4": "#FF9F0A",
174
+ }
175
+
176
+ x = range(len(layer_names))
177
+ width = 0.8 / len(precisions)
178
+
179
+ for idx, precision in enumerate(precisions):
180
+ layers = breakdowns[precision]
181
+ layer_map = {layer.layer_name: layer for layer in layers}
182
+
183
+ flops_values = []
184
+ for layer in sorted_layers:
185
+ if layer.layer_name in layer_map:
186
+ flops_values.append(layer_map[layer.layer_name].flops / 1e9) # Convert to GFLOPs
187
+ else:
188
+ flops_values.append(0)
189
+
190
+ offset = (idx - len(precisions) / 2 + 0.5) * width
191
+ color = precision_colors.get(precision.lower(), "#BF5AF2")
192
+ ax.barh(
193
+ [i + offset for i in x],
194
+ flops_values,
195
+ width,
196
+ label=precision.upper(),
197
+ color=color,
198
+ alpha=0.8,
199
+ )
200
+
201
+ ax.set_xlabel("FLOPs (G)", fontsize=12, color="white")
202
+ ax.set_ylabel("Layer", fontsize=12, color="white")
203
+ ax.set_title(title, fontsize=14, fontweight="bold", color="white", pad=15)
204
+ ax.set_yticks(list(x))
205
+ ax.set_yticklabels(layer_names, fontsize=8)
206
+ ax.tick_params(colors="white")
207
+
208
+ ax.spines["bottom"].set_color("#636366")
209
+ ax.spines["left"].set_color("#636366")
210
+ ax.spines["top"].set_visible(False)
211
+ ax.spines["right"].set_visible(False)
212
+
213
+ ax.legend(loc="lower right", framealpha=0.3, facecolor="#1a1a1a")
214
+ ax.invert_yaxis() # Highest FLOPs at top
215
+
216
+ plt.tight_layout()
217
+
218
+ buf = BytesIO()
219
+ fig.savefig(buf, format="png", dpi=150, facecolor="#0a0a0a")
220
+ buf.seek(0)
221
+ png_bytes = buf.read()
222
+
223
+ if output_path:
224
+ output_path.parent.mkdir(parents=True, exist_ok=True)
225
+ output_path.write_bytes(png_bytes)
226
+
227
+ plt.close(fig)
228
+ return png_bytes
229
+
230
+
231
+ def compute_tradeoff_points(
232
+ compare_json: dict[str, Any],
233
+ ) -> list[TradeoffPoint]:
234
+ """
235
+ Compute tradeoff points from comparison JSON.
236
+
237
+ Task 6.4.3: Trade-off analysis
238
+
239
+ Returns a list of TradeoffPoint objects for each variant,
240
+ with speedup and accuracy delta relative to baseline.
241
+ """
242
+ variants = compare_json.get("variants", [])
243
+ baseline_precision = compare_json.get("baseline_precision", "fp32")
244
+
245
+ # Find baseline variant
246
+ baseline = None
247
+ for v in variants:
248
+ if v.get("precision") == baseline_precision:
249
+ baseline = v
250
+ break
251
+
252
+ if baseline is None and variants:
253
+ baseline = variants[0]
254
+
255
+ if baseline is None:
256
+ return []
257
+
258
+ # Extract baseline metrics
259
+ baseline_metrics = baseline.get("metrics", {})
260
+ baseline_latency = (
261
+ baseline_metrics.get("latency_ms_p50") or baseline_metrics.get("latency_ms") or 1.0
262
+ )
263
+ baseline_accuracy = baseline_metrics.get("f1_macro") or baseline_metrics.get("accuracy") or 1.0
264
+ baseline_size = baseline.get("size_bytes", 1)
265
+ baseline_memory = baseline.get("memory_bytes") or baseline_size
266
+
267
+ points: list[TradeoffPoint] = []
268
+ for v in variants:
269
+ precision = v.get("precision", "unknown")
270
+ metrics = v.get("metrics", {})
271
+
272
+ latency = metrics.get("latency_ms_p50") or metrics.get("latency_ms") or baseline_latency
273
+ accuracy = metrics.get("f1_macro") or metrics.get("accuracy") or baseline_accuracy
274
+ size = v.get("size_bytes", baseline_size)
275
+ memory = v.get("memory_bytes") or size
276
+
277
+ # Compute ratios
278
+ speedup = baseline_latency / max(latency, 0.001)
279
+ accuracy_delta = accuracy - baseline_accuracy
280
+ size_ratio = size / max(baseline_size, 1)
281
+ memory_ratio = memory / max(baseline_memory, 1)
282
+
283
+ points.append(
284
+ TradeoffPoint(
285
+ precision=precision,
286
+ speedup=speedup,
287
+ accuracy_delta=accuracy_delta,
288
+ size_ratio=size_ratio,
289
+ memory_ratio=memory_ratio,
290
+ )
291
+ )
292
+
293
+ return points
294
+
295
+
296
+ def generate_tradeoff_chart(
297
+ points: Sequence[TradeoffPoint],
298
+ output_path: Path | None = None,
299
+ title: str = "Accuracy vs Speedup Tradeoff",
300
+ ) -> bytes | None:
301
+ """
302
+ Generate accuracy vs speedup tradeoff chart.
303
+
304
+ Task 6.4.5: Show accuracy vs speedup tradeoff chart
305
+
306
+ Args:
307
+ points: List of TradeoffPoint objects
308
+ output_path: Optional path to save the chart
309
+ title: Chart title
310
+
311
+ Returns:
312
+ PNG bytes if successful, None otherwise
313
+ """
314
+ if not MATPLOTLIB_AVAILABLE or not points:
315
+ return None
316
+
317
+ # Chart styling (dark theme matching existing visualizations)
318
+ plt.style.use("dark_background")
319
+
320
+ fig, ax = plt.subplots(figsize=(10, 7))
321
+ fig.patch.set_facecolor("#0a0a0a")
322
+ ax.set_facecolor("#1a1a1a")
323
+
324
+ # Colors for different precisions
325
+ precision_colors = {
326
+ "fp32": "#4A90D9",
327
+ "fp16": "#30D158",
328
+ "bf16": "#64D2FF",
329
+ "int8": "#FFD60A",
330
+ "int4": "#FF9F0A",
331
+ "unknown": "#636366",
332
+ }
333
+
334
+ # Plot each point
335
+ for p in points:
336
+ color = precision_colors.get(p.precision.lower(), "#BF5AF2")
337
+
338
+ # Size based on memory reduction (smaller = bigger marker)
339
+ marker_size = max(100, 400 * (1 - p.memory_ratio + 0.5))
340
+
341
+ ax.scatter(
342
+ p.speedup,
343
+ p.accuracy_delta * 100, # Convert to percentage
344
+ s=marker_size,
345
+ c=color,
346
+ label=p.precision.upper(),
347
+ alpha=0.8,
348
+ edgecolors="white",
349
+ linewidths=1.5,
350
+ zorder=5,
351
+ )
352
+
353
+ # Annotate with precision label
354
+ ax.annotate(
355
+ p.precision.upper(),
356
+ (p.speedup, p.accuracy_delta * 100),
357
+ textcoords="offset points",
358
+ xytext=(0, 10),
359
+ ha="center",
360
+ fontsize=10,
361
+ fontweight="bold",
362
+ color="white",
363
+ )
364
+
365
+ # Reference lines
366
+ ax.axhline(y=0, color="#636366", linestyle="--", alpha=0.5, label="Baseline accuracy")
367
+ ax.axvline(x=1.0, color="#636366", linestyle="--", alpha=0.5, label="Baseline speed")
368
+
369
+ # Styling
370
+ ax.set_xlabel("Speedup (×)", fontsize=12, color="white") # noqa: RUF001
371
+ ax.set_ylabel("Accuracy Change (%)", fontsize=12, color="white")
372
+ ax.set_title(title, fontsize=14, fontweight="bold", color="white", pad=15)
373
+
374
+ ax.tick_params(colors="white")
375
+ ax.spines["bottom"].set_color("#636366")
376
+ ax.spines["left"].set_color("#636366")
377
+ ax.spines["top"].set_visible(False)
378
+ ax.spines["right"].set_visible(False)
379
+
380
+ ax.grid(True, alpha=0.2, color="#636366")
381
+
382
+ # Add quadrant labels
383
+ ax.text(
384
+ 0.98,
385
+ 0.98,
386
+ "Slower + Better",
387
+ transform=ax.transAxes,
388
+ ha="right",
389
+ va="top",
390
+ fontsize=9,
391
+ color="#636366",
392
+ alpha=0.7,
393
+ )
394
+ ax.text(
395
+ 0.02,
396
+ 0.98,
397
+ "Faster + Better ✓",
398
+ transform=ax.transAxes,
399
+ ha="left",
400
+ va="top",
401
+ fontsize=9,
402
+ color="#30D158",
403
+ alpha=0.9,
404
+ )
405
+ ax.text(
406
+ 0.98,
407
+ 0.02,
408
+ "Slower + Worse ✗",
409
+ transform=ax.transAxes,
410
+ ha="right",
411
+ va="bottom",
412
+ fontsize=9,
413
+ color="#FF453A",
414
+ alpha=0.9,
415
+ )
416
+ ax.text(
417
+ 0.02,
418
+ 0.02,
419
+ "Faster + Worse",
420
+ transform=ax.transAxes,
421
+ ha="left",
422
+ va="bottom",
423
+ fontsize=9,
424
+ color="#FFD60A",
425
+ alpha=0.7,
426
+ )
427
+
428
+ # Legend
429
+ handles, labels = ax.get_legend_handles_labels()
430
+ # Remove duplicate labels
431
+ by_label = dict(zip(labels, handles, strict=False))
432
+ ax.legend(
433
+ by_label.values(),
434
+ by_label.keys(),
435
+ loc="upper right",
436
+ framealpha=0.3,
437
+ facecolor="#1a1a1a",
438
+ edgecolor="#636366",
439
+ )
440
+
441
+ plt.tight_layout()
442
+
443
+ # Save to bytes
444
+ buf = BytesIO()
445
+ fig.savefig(buf, format="png", dpi=150, facecolor="#0a0a0a")
446
+ buf.seek(0)
447
+ png_bytes = buf.read()
448
+
449
+ if output_path:
450
+ output_path.parent.mkdir(parents=True, exist_ok=True)
451
+ output_path.write_bytes(png_bytes)
452
+
453
+ plt.close(fig)
454
+ return png_bytes
455
+
456
+
457
+ def generate_memory_savings_chart(
458
+ compare_json: dict[str, Any],
459
+ output_path: Path | None = None,
460
+ title: str = "Memory & Size Reduction",
461
+ ) -> bytes | None:
462
+ """
463
+ Generate memory savings comparison chart.
464
+
465
+ Task 6.4.6: Display memory savings per layer analysis
466
+
467
+ Shows size and memory reduction for each variant relative to baseline.
468
+ """
469
+ if not MATPLOTLIB_AVAILABLE:
470
+ return None
471
+
472
+ variants = compare_json.get("variants", [])
473
+ if not variants:
474
+ return None
475
+
476
+ plt.style.use("dark_background")
477
+
478
+ fig, ax = plt.subplots(figsize=(10, 6))
479
+ fig.patch.set_facecolor("#0a0a0a")
480
+ ax.set_facecolor("#1a1a1a")
481
+
482
+ # Find baseline
483
+ baseline_precision = compare_json.get("baseline_precision", "fp32")
484
+ baseline_size = 1
485
+ baseline_memory = 1
486
+ for v in variants:
487
+ if v.get("precision") == baseline_precision:
488
+ baseline_size = v.get("size_bytes", 1)
489
+ baseline_memory = v.get("memory_bytes") or baseline_size
490
+ break
491
+
492
+ # Prepare data - skip baseline since it shows 0% reduction
493
+ precisions = []
494
+ size_reductions = []
495
+ memory_reductions = []
496
+
497
+ for v in variants:
498
+ precision = v.get("precision", "unknown")
499
+ # Skip baseline - it always shows 0% reduction
500
+ if precision == baseline_precision:
501
+ continue
502
+
503
+ size = v.get("size_bytes", baseline_size)
504
+ memory = v.get("memory_bytes") or size
505
+
506
+ precisions.append(f"{precision.upper()} vs {baseline_precision.upper()}")
507
+ size_reductions.append((1 - size / baseline_size) * 100)
508
+ memory_reductions.append((1 - memory / baseline_memory) * 100)
509
+
510
+ if not precisions:
511
+ return None # Nothing to compare if only baseline
512
+
513
+ x = range(len(precisions))
514
+ width = 0.35
515
+
516
+ bars1 = ax.bar(
517
+ [i - width / 2 for i in x],
518
+ size_reductions,
519
+ width,
520
+ label="File Size",
521
+ color="#4A90D9",
522
+ alpha=0.8,
523
+ )
524
+ bars2 = ax.bar(
525
+ [i + width / 2 for i in x],
526
+ memory_reductions,
527
+ width,
528
+ label="Memory",
529
+ color="#30D158",
530
+ alpha=0.8,
531
+ )
532
+
533
+ # Add value labels
534
+ for bar in bars1:
535
+ height = bar.get_height()
536
+ ax.annotate(
537
+ f"{height:.1f}%",
538
+ xy=(bar.get_x() + bar.get_width() / 2, height),
539
+ xytext=(0, 3),
540
+ textcoords="offset points",
541
+ ha="center",
542
+ va="bottom",
543
+ fontsize=9,
544
+ color="white",
545
+ )
546
+ for bar in bars2:
547
+ height = bar.get_height()
548
+ ax.annotate(
549
+ f"{height:.1f}%",
550
+ xy=(bar.get_x() + bar.get_width() / 2, height),
551
+ xytext=(0, 3),
552
+ textcoords="offset points",
553
+ ha="center",
554
+ va="bottom",
555
+ fontsize=9,
556
+ color="white",
557
+ )
558
+
559
+ ax.set_ylabel("Reduction (%)", fontsize=12, color="white")
560
+ ax.set_title(title, fontsize=14, fontweight="bold", color="white", pad=15)
561
+ ax.set_xticks(list(x))
562
+ ax.set_xticklabels(precisions)
563
+ ax.tick_params(colors="white")
564
+
565
+ ax.axhline(y=0, color="#636366", linestyle="-", alpha=0.5)
566
+
567
+ ax.spines["bottom"].set_color("#636366")
568
+ ax.spines["left"].set_color("#636366")
569
+ ax.spines["top"].set_visible(False)
570
+ ax.spines["right"].set_visible(False)
571
+
572
+ ax.legend(loc="upper right", framealpha=0.3, facecolor="#1a1a1a")
573
+
574
+ plt.tight_layout()
575
+
576
+ buf = BytesIO()
577
+ fig.savefig(buf, format="png", dpi=150, facecolor="#0a0a0a")
578
+ buf.seek(0)
579
+ png_bytes = buf.read()
580
+
581
+ if output_path:
582
+ output_path.parent.mkdir(parents=True, exist_ok=True)
583
+ output_path.write_bytes(png_bytes)
584
+
585
+ plt.close(fig)
586
+ return png_bytes
587
+
588
+
589
+ def analyze_tradeoffs(
590
+ compare_json: dict[str, Any],
591
+ ) -> dict[str, Any]:
592
+ """
593
+ Analyze tradeoffs between variants and generate recommendations.
594
+
595
+ Task 6.4.3: Add trade-off analysis section
596
+
597
+ Returns a dict with:
598
+ - best_balanced: variant with best speedup-accuracy balance
599
+ - best_speed: variant with best speedup
600
+ - best_accuracy: variant with best accuracy
601
+ - recommendations: list of textual recommendations
602
+ """
603
+ points = compute_tradeoff_points(compare_json)
604
+ if not points:
605
+ return {"recommendations": ["No variants to analyze"]}
606
+
607
+ analysis: dict[str, Any] = {}
608
+
609
+ # Find best variants
610
+ best_balanced_score = float("-inf")
611
+ best_balanced = None
612
+
613
+ for p in points:
614
+ # Score: speedup bonus minus accuracy penalty (weighted)
615
+ # Higher speedup is good, negative accuracy delta is bad
616
+ score = p.speedup - 1.0 + (p.accuracy_delta * 10) # 10x weight on accuracy
617
+ if score > best_balanced_score:
618
+ best_balanced_score = score
619
+ best_balanced = p
620
+
621
+ best_speed = max(points, key=lambda p: p.speedup)
622
+ best_accuracy = max(points, key=lambda p: p.accuracy_delta)
623
+ smallest = min(points, key=lambda p: p.size_ratio)
624
+
625
+ analysis["best_balanced"] = best_balanced.precision if best_balanced else None
626
+ analysis["best_speed"] = best_speed.precision
627
+ analysis["best_accuracy"] = best_accuracy.precision
628
+ analysis["smallest"] = smallest.precision
629
+
630
+ # Generate recommendations
631
+ recommendations: list[str] = []
632
+
633
+ # Check for sweet spot
634
+ for p in points:
635
+ if p.speedup > 1.3 and p.accuracy_delta > -0.01:
636
+ recommendations.append(
637
+ f"**{p.precision.upper()}** offers {p.speedup:.1f}x speedup with "
638
+ f"minimal accuracy loss ({p.accuracy_delta * 100:.2f}%) - recommended."
639
+ )
640
+
641
+ # Warn about significant accuracy drops
642
+ for p in points:
643
+ if p.accuracy_delta < -0.05:
644
+ recommendations.append(
645
+ f"**{p.precision.upper()}** has significant accuracy drop "
646
+ f"({p.accuracy_delta * 100:.2f}%) — validate on your data before use."
647
+ )
648
+
649
+ # Memory savings
650
+ for p in points:
651
+ if p.memory_ratio < 0.6:
652
+ savings = (1 - p.memory_ratio) * 100
653
+ recommendations.append(
654
+ f"**{p.precision.upper()}** saves {savings:.0f}% memory — "
655
+ "ideal for edge/mobile deployment."
656
+ )
657
+
658
+ if not recommendations:
659
+ recommendations.append(
660
+ "All variants show similar trade-offs. Consider your "
661
+ "deployment constraints (latency, memory, accuracy) to choose."
662
+ )
663
+
664
+ analysis["recommendations"] = recommendations
665
+ analysis["tradeoff_points"] = [
666
+ {
667
+ "precision": p.precision,
668
+ "speedup": round(p.speedup, 3),
669
+ "accuracy_delta": round(p.accuracy_delta, 5),
670
+ "size_ratio": round(p.size_ratio, 3),
671
+ "memory_ratio": round(p.memory_ratio, 3),
672
+ }
673
+ for p in points
674
+ ]
675
+
676
+ return analysis
677
+
678
+
679
+ @dataclass
680
+ class NormalizedMetrics:
681
+ """Normalized efficiency metrics for a model variant."""
682
+
683
+ precision: str
684
+ flops_per_param: float # Compute intensity
685
+ memory_per_param: float # Memory efficiency
686
+ params_per_mb: float # Storage efficiency
687
+ efficiency_score: float # Composite efficiency (0-100)
688
+
689
+
690
+ def compute_normalized_metrics(
691
+ compare_json: dict[str, Any],
692
+ ) -> list[NormalizedMetrics]:
693
+ """
694
+ Compute normalized metrics for comparison.
695
+
696
+ Task 6.10.2: Implement normalized metrics (FLOPs/param, memory/param, etc.)
697
+
698
+ Returns normalized efficiency metrics for each variant.
699
+ """
700
+ variants = compare_json.get("variants", [])
701
+ metrics_list: list[NormalizedMetrics] = []
702
+
703
+ for v in variants:
704
+ precision = v.get("precision", "unknown")
705
+ size_bytes = v.get("size_bytes", 0)
706
+ report = v.get("report", {})
707
+
708
+ # Extract raw metrics
709
+ param_counts = report.get("param_counts", {})
710
+ flop_counts = report.get("flop_counts", {})
711
+ memory_est = report.get("memory_estimates", {})
712
+
713
+ total_params = param_counts.get("total", 0) or 1 # Avoid div by zero
714
+ total_flops = flop_counts.get("total", 0)
715
+ peak_memory = memory_est.get("peak_activation_bytes", 0)
716
+
717
+ # Compute normalized metrics
718
+ flops_per_param = total_flops / total_params if total_params > 0 else 0
719
+ memory_per_param = peak_memory / total_params if total_params > 0 else 0
720
+ size_mb = size_bytes / (1024 * 1024) if size_bytes > 0 else 1
721
+ params_per_mb = total_params / size_mb
722
+
723
+ # Composite efficiency score (higher = more efficient)
724
+ # Favors: higher FLOPs/param (more compute per weight), lower memory/param
725
+ efficiency_score = min(100, max(0, (flops_per_param / 100) - (memory_per_param / 1000)))
726
+
727
+ metrics_list.append(
728
+ NormalizedMetrics(
729
+ precision=precision,
730
+ flops_per_param=flops_per_param,
731
+ memory_per_param=memory_per_param,
732
+ params_per_mb=params_per_mb,
733
+ efficiency_score=efficiency_score,
734
+ )
735
+ )
736
+
737
+ return metrics_list
738
+
739
+
740
+ def generate_radar_chart(
741
+ compare_json: dict[str, Any],
742
+ output_path: Path | None = None,
743
+ ) -> bytes | None:
744
+ """
745
+ Generate radar chart comparing key metrics across model variants.
746
+
747
+ Task 6.10.3: Add radar chart comparing key metrics across models
748
+
749
+ Compares: Size, Params, FLOPs, Memory, Latency (if available)
750
+ Each axis is normalized 0-1 where lower is better.
751
+ """
752
+ if not MATPLOTLIB_AVAILABLE:
753
+ LOGGER.warning("matplotlib not available, skipping radar chart")
754
+ return None
755
+
756
+ import numpy as np
757
+
758
+ variants = compare_json.get("variants", [])
759
+ if not variants:
760
+ return None
761
+
762
+ baseline_precision = compare_json.get("baseline_precision", "fp32")
763
+
764
+ # Find baseline for normalization
765
+ baseline = None
766
+ for v in variants:
767
+ if v.get("precision") == baseline_precision:
768
+ baseline = v
769
+ break
770
+ if baseline is None:
771
+ baseline = variants[0]
772
+
773
+ # Extract baseline values for normalization
774
+ baseline_size = baseline.get("size_bytes", 1) or 1
775
+ baseline_report = baseline.get("report", {})
776
+ baseline_params = baseline_report.get("param_counts", {}).get("total", 1) or 1
777
+ baseline_flops = baseline_report.get("flop_counts", {}).get("total", 1) or 1
778
+ baseline_memory = (
779
+ baseline_report.get("memory_estimates", {}).get("peak_activation_bytes", 1) or 1
780
+ )
781
+ baseline_metrics = baseline.get("metrics", {})
782
+ baseline_latency = (
783
+ baseline_metrics.get("latency_ms_p50") or baseline_metrics.get("latency_ms") or 1
784
+ )
785
+
786
+ # Define radar categories (lower is better for all)
787
+ categories = ["Size", "Params", "FLOPs", "Memory", "Latency"]
788
+ num_vars = len(categories)
789
+
790
+ # Compute angles for radar
791
+ angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
792
+ angles += angles[:1] # Complete the loop
793
+
794
+ # Create figure
795
+ fig, ax = plt.subplots(figsize=(8, 8), subplot_kw={"polar": True})
796
+ fig.patch.set_facecolor("#1a1a2e")
797
+ ax.set_facecolor("#16213e")
798
+
799
+ # Colors for each variant
800
+ colors = ["#4361ee", "#f72585", "#4cc9f0", "#7209b7", "#3a0ca3"]
801
+
802
+ for idx, v in enumerate(variants):
803
+ precision = v.get("precision", "unknown")
804
+ size = v.get("size_bytes", baseline_size)
805
+ report = v.get("report", {})
806
+ params = report.get("param_counts", {}).get("total", baseline_params) or baseline_params
807
+ flops = report.get("flop_counts", {}).get("total", baseline_flops) or baseline_flops
808
+ memory = (
809
+ report.get("memory_estimates", {}).get("peak_activation_bytes", baseline_memory)
810
+ or baseline_memory
811
+ )
812
+ metrics = v.get("metrics", {})
813
+ latency = metrics.get("latency_ms_p50") or metrics.get("latency_ms") or baseline_latency
814
+
815
+ # Normalize (0-1 scale, relative to baseline, capped at 2x)
816
+ values = [
817
+ min(2.0, size / baseline_size),
818
+ min(2.0, params / baseline_params),
819
+ min(2.0, flops / baseline_flops),
820
+ min(2.0, memory / baseline_memory),
821
+ min(2.0, latency / baseline_latency),
822
+ ]
823
+ values += values[:1] # Complete the loop
824
+
825
+ color = colors[idx % len(colors)]
826
+ ax.plot(angles, values, "o-", linewidth=2, label=precision, color=color)
827
+ ax.fill(angles, values, alpha=0.25, color=color)
828
+
829
+ # Styling
830
+ ax.set_xticks(angles[:-1])
831
+ ax.set_xticklabels(categories, color="#e0e0e0", size=10)
832
+ ax.tick_params(colors="#e0e0e0")
833
+
834
+ # Set radial limits
835
+ ax.set_ylim(0, 2.0)
836
+ ax.set_yticks([0.5, 1.0, 1.5, 2.0])
837
+ ax.set_yticklabels(["0.5x", "1x", "1.5x", "2x"], color="#a0a0a0", size=8)
838
+
839
+ # Grid styling
840
+ ax.grid(True, color="#404060", linestyle="-", linewidth=0.5)
841
+
842
+ # Legend
843
+ legend = ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
844
+ legend.get_frame().set_facecolor("#16213e")
845
+ legend.get_frame().set_edgecolor("#404060")
846
+ for text in legend.get_texts():
847
+ text.set_color("#e0e0e0")
848
+
849
+ ax.set_title(
850
+ "Model Comparison (lower = better)",
851
+ color="#e0e0e0",
852
+ size=14,
853
+ fontweight="bold",
854
+ pad=20,
855
+ )
856
+
857
+ # Save to bytes
858
+ buf = BytesIO()
859
+ fig.savefig(buf, format="png", dpi=150, facecolor="#1a1a2e", bbox_inches="tight")
860
+ buf.seek(0)
861
+ chart_bytes = buf.getvalue()
862
+ plt.close(fig)
863
+
864
+ if output_path:
865
+ output_path.parent.mkdir(parents=True, exist_ok=True)
866
+ output_path.write_bytes(chart_bytes)
867
+ LOGGER.info(f"Radar chart saved to {output_path}")
868
+
869
+ return chart_bytes
870
+
871
+
872
+ def generate_compare_pdf(
873
+ compare_json: dict[str, Any],
874
+ output_path: Path,
875
+ include_charts: bool = True,
876
+ ) -> Path | None:
877
+ """
878
+ Generate PDF report for model comparison.
879
+
880
+ Task 6.10.9: Generate comparison PDF report
881
+
882
+ Uses the existing PDF generator infrastructure if available.
883
+ """
884
+ try:
885
+ from .pdf_generator import PDFGenerator
886
+ from .pdf_generator import is_available as is_pdf_available
887
+ except ImportError:
888
+ LOGGER.warning("PDF generator not available")
889
+ return None
890
+
891
+ if not is_pdf_available():
892
+ LOGGER.warning("PDF generation not available (Playwright not installed)")
893
+ return None
894
+
895
+ # First generate HTML content
896
+ html_content = generate_compare_html(
897
+ compare_json,
898
+ include_charts=include_charts,
899
+ )
900
+
901
+ # Use PDF generator to convert HTML to PDF
902
+ generator = PDFGenerator()
903
+ try:
904
+ generator.generate_from_html(
905
+ html_content=html_content,
906
+ output_path=output_path,
907
+ )
908
+ LOGGER.info(f"Comparison PDF saved to {output_path}")
909
+ return output_path
910
+ except Exception as e:
911
+ LOGGER.error(f"Failed to generate PDF: {e}")
912
+ return None
913
+
914
+
915
+ def generate_calibration_recommendations(
916
+ compare_json: dict[str, Any],
917
+ ) -> list[CalibrationRecommendation]:
918
+ """
919
+ Generate quantization calibration recommendations.
920
+
921
+ Task 6.4.8: Show quantization calibration recommendations
922
+
923
+ Provides guidance on improving quantization quality based on
924
+ observed accuracy/performance gaps.
925
+ """
926
+ recommendations: list[CalibrationRecommendation] = []
927
+ points = compute_tradeoff_points(compare_json)
928
+
929
+ # Check for INT8 models - warn about GPU inference limitations
930
+ has_int8 = any("int8" in p.precision.lower() for p in points)
931
+ if has_int8:
932
+ recommendations.append(
933
+ CalibrationRecommendation(
934
+ recommendation="Use TensorRT EP for INT8 GPU inference",
935
+ reason="ONNX Runtime's CUDA EP lacks optimized INT8 kernels, falling back to CPU",
936
+ severity="warning",
937
+ affected_layers=["ConvInteger", "MatMulInteger"],
938
+ )
939
+ )
940
+ recommendations.append(
941
+ CalibrationRecommendation(
942
+ recommendation="Alternative: Export to TensorRT engine for native INT8 GPU",
943
+ reason="TensorRT has full INT8 GPU kernel support with tensor cores",
944
+ severity="info",
945
+ affected_layers=["all"],
946
+ )
947
+ )
948
+
949
+ # Check for significant int8 accuracy drop
950
+ for p in points:
951
+ if "int8" in p.precision.lower() and p.accuracy_delta < -0.02:
952
+ recommendations.append(
953
+ CalibrationRecommendation(
954
+ recommendation="Consider increasing calibration dataset size",
955
+ reason=f"INT8 shows {abs(p.accuracy_delta) * 100:.1f}% accuracy drop",
956
+ severity="warning",
957
+ affected_layers=["all"],
958
+ )
959
+ )
960
+ recommendations.append(
961
+ CalibrationRecommendation(
962
+ recommendation="Try percentile calibration (99.99%) instead of minmax",
963
+ reason="Percentile calibration is more robust to outliers",
964
+ severity="info",
965
+ affected_layers=["all"],
966
+ )
967
+ )
968
+
969
+ # Check for int4 issues
970
+ for p in points:
971
+ if "int4" in p.precision.lower() and p.accuracy_delta < -0.05:
972
+ recommendations.append(
973
+ CalibrationRecommendation(
974
+ recommendation="Consider mixed-precision: keep attention in INT8/FP16",
975
+ reason=f"INT4 shows significant accuracy drop ({abs(p.accuracy_delta) * 100:.1f}%)",
976
+ severity="critical",
977
+ affected_layers=["attention", "qkv_proj"],
978
+ )
979
+ )
980
+ recommendations.append(
981
+ CalibrationRecommendation(
982
+ recommendation="Use GPTQ or AWQ for better INT4 accuracy",
983
+ reason="Advanced quantization methods preserve accuracy better",
984
+ severity="info",
985
+ affected_layers=["all"],
986
+ )
987
+ )
988
+
989
+ # General recommendations
990
+ if not recommendations:
991
+ recommendations.append(
992
+ CalibrationRecommendation(
993
+ recommendation="Quantization quality looks good",
994
+ reason="No significant accuracy drops detected",
995
+ severity="info",
996
+ affected_layers=[],
997
+ )
998
+ )
999
+
1000
+ return recommendations
1001
+
1002
+
1003
+ def build_enhanced_markdown(
1004
+ compare_json: dict[str, Any],
1005
+ include_charts: bool = True,
1006
+ assets_dir: Path | None = None,
1007
+ ) -> str:
1008
+ """
1009
+ Build enhanced Markdown report with trade-off analysis.
1010
+
1011
+ Task 6.4.2: Create comparison Markdown table (enhanced)
1012
+ Task 6.4.3: Add trade-off analysis section
1013
+
1014
+ If include_charts is True and matplotlib is available, generates
1015
+ charts and embeds them in the Markdown.
1016
+ """
1017
+ lines: list[str] = []
1018
+
1019
+ model_family_id = compare_json.get("model_family_id", "unknown_model")
1020
+ baseline_precision = compare_json.get("baseline_precision", "unknown")
1021
+
1022
+ lines.append(f"# Quantization Impact Report: {model_family_id}")
1023
+ lines.append("")
1024
+ lines.append(f"**Baseline**: {baseline_precision.upper()}")
1025
+ lines.append("")
1026
+
1027
+ # Architecture compatibility
1028
+ if not compare_json.get("architecture_compatible", True):
1029
+ lines.append("## ⚠️ Compatibility Warnings")
1030
+ lines.append("")
1031
+ for warn in compare_json.get("compatibility_warnings", []):
1032
+ lines.append(f"- {warn}")
1033
+ lines.append("")
1034
+
1035
+ # Trade-off analysis
1036
+ analysis = analyze_tradeoffs(compare_json)
1037
+ lines.append("## Trade-off Analysis")
1038
+ lines.append("")
1039
+
1040
+ if analysis.get("best_balanced"):
1041
+ lines.append(f"- **Best Balance**: {analysis['best_balanced'].upper()}")
1042
+ if analysis.get("best_speed"):
1043
+ lines.append(f"- **Fastest**: {analysis['best_speed'].upper()}")
1044
+ if analysis.get("smallest"):
1045
+ lines.append(f"- **Smallest**: {analysis['smallest'].upper()}")
1046
+ lines.append("")
1047
+
1048
+ # Recommendations
1049
+ lines.append("### Recommendations")
1050
+ lines.append("")
1051
+ for rec in analysis.get("recommendations", []):
1052
+ lines.append(f"- {rec}")
1053
+ lines.append("")
1054
+
1055
+ # Tradeoff chart
1056
+ if include_charts and MATPLOTLIB_AVAILABLE:
1057
+ points = compute_tradeoff_points(compare_json)
1058
+ if points and assets_dir:
1059
+ chart_path = assets_dir / "tradeoff_chart.png"
1060
+ png_bytes = generate_tradeoff_chart(points, chart_path)
1061
+ if png_bytes:
1062
+ lines.append("### Accuracy vs Speedup")
1063
+ lines.append("")
1064
+ lines.append(f"![Tradeoff Chart]({chart_path.name})")
1065
+ lines.append("")
1066
+
1067
+ # Memory chart
1068
+ mem_chart_path = assets_dir / "memory_savings.png"
1069
+ mem_bytes = generate_memory_savings_chart(compare_json, mem_chart_path)
1070
+ if mem_bytes:
1071
+ lines.append("### Memory Savings")
1072
+ lines.append("")
1073
+ lines.append(f"![Memory Savings]({mem_chart_path.name})")
1074
+ lines.append("")
1075
+
1076
+ # Variant comparison table
1077
+ lines.append("## Variant Comparison")
1078
+ lines.append("")
1079
+ lines.append("| Precision | Size | Params | FLOPs | Speedup | Δ Accuracy |")
1080
+ lines.append("|-----------|------|--------|-------|---------|------------|")
1081
+
1082
+ tradeoff_map = {p["precision"]: p for p in analysis.get("tradeoff_points", [])}
1083
+
1084
+ for v in compare_json.get("variants", []):
1085
+ precision = v.get("precision", "unknown")
1086
+ size_bytes = v.get("size_bytes", 0)
1087
+ total_params = v.get("total_params")
1088
+ total_flops = v.get("total_flops")
1089
+
1090
+ tp = tradeoff_map.get(precision, {})
1091
+ speedup = tp.get("speedup", 1.0)
1092
+ acc_delta = tp.get("accuracy_delta", 0.0)
1093
+
1094
+ size_str = _format_bytes(size_bytes)
1095
+ params_str = _format_number(total_params) if total_params else "-"
1096
+ flops_str = _format_number(total_flops) if total_flops else "-"
1097
+ speedup_str = f"{speedup:.2f}x"
1098
+ acc_str = f"{acc_delta * 100:+.2f}%" if acc_delta != 0 else "baseline"
1099
+
1100
+ lines.append(
1101
+ f"| {precision.upper()} | {size_str} | {params_str} | {flops_str} | "
1102
+ f"{speedup_str} | {acc_str} |"
1103
+ )
1104
+
1105
+ lines.append("")
1106
+
1107
+ # Calibration recommendations
1108
+ calib_recs = generate_calibration_recommendations(compare_json)
1109
+ if calib_recs and any(
1110
+ r.severity != "info" or "good" not in r.recommendation for r in calib_recs
1111
+ ):
1112
+ lines.append("## Calibration Recommendations")
1113
+ lines.append("")
1114
+ for rec in calib_recs:
1115
+ icon = {"info": "i", "warning": "!", "critical": "!!!"}.get(rec.severity, "*")
1116
+ lines.append(f"- {icon} **{rec.recommendation}**")
1117
+ lines.append(f" - {rec.reason}")
1118
+ lines.append("")
1119
+
1120
+ lines.append("---")
1121
+ lines.append("*Generated by HaoLine Compare Mode*")
1122
+ lines.append("")
1123
+
1124
+ return "\n".join(lines)
1125
+
1126
+
1127
+ def _format_number(n: float | None) -> str:
1128
+ """Format large numbers with K/M/G suffixes."""
1129
+ if n is None:
1130
+ return "-"
1131
+ if abs(n) >= 1e9:
1132
+ return f"{n / 1e9:.2f}G"
1133
+ if abs(n) >= 1e6:
1134
+ return f"{n / 1e6:.2f}M"
1135
+ if abs(n) >= 1e3:
1136
+ return f"{n / 1e3:.2f}K"
1137
+ return f"{n:.0f}"
1138
+
1139
+
1140
+ def _format_bytes(n: int) -> str:
1141
+ """Format bytes with KB/MB/GB suffixes."""
1142
+ if n >= 1e9:
1143
+ return f"{n / 1e9:.2f} GB"
1144
+ if n >= 1e6:
1145
+ return f"{n / 1e6:.2f} MB"
1146
+ if n >= 1e3:
1147
+ return f"{n / 1e3:.2f} KB"
1148
+ return f"{n} B"
1149
+
1150
+
1151
+ def generate_compare_html(
1152
+ compare_json: dict[str, Any],
1153
+ output_path: Path | None = None,
1154
+ include_charts: bool = True,
1155
+ ) -> str:
1156
+ """
1157
+ Generate an HTML comparison report with engine summary panel.
1158
+
1159
+ Task 6.4.7: Add engine summary panel (HTML compare report)
1160
+
1161
+ This creates a TRT EngineXplorer-inspired HTML report with:
1162
+ - Engine summary panel (model family, baseline, variants)
1163
+ - Tradeoff visualization
1164
+ - Memory savings analysis
1165
+ - Calibration recommendations
1166
+ """
1167
+ model_family_id = compare_json.get("model_family_id", "unknown_model")
1168
+ baseline_precision = compare_json.get("baseline_precision", "fp32")
1169
+ variants = compare_json.get("variants", [])
1170
+ arch_compatible = compare_json.get("architecture_compatible", True)
1171
+ warnings = compare_json.get("compatibility_warnings", [])
1172
+
1173
+ # Compute analysis
1174
+ analysis = analyze_tradeoffs(compare_json)
1175
+ calib_recs = generate_calibration_recommendations(compare_json)
1176
+ tradeoff_points = compute_tradeoff_points(compare_json)
1177
+
1178
+ # Generate charts as base64 if matplotlib available
1179
+ tradeoff_chart_b64 = ""
1180
+ memory_chart_b64 = ""
1181
+ if include_charts and MATPLOTLIB_AVAILABLE and tradeoff_points:
1182
+ chart_bytes = generate_tradeoff_chart(tradeoff_points)
1183
+ if chart_bytes:
1184
+ tradeoff_chart_b64 = base64.b64encode(chart_bytes).decode("utf-8")
1185
+
1186
+ mem_bytes = generate_memory_savings_chart(compare_json)
1187
+ if mem_bytes:
1188
+ memory_chart_b64 = base64.b64encode(mem_bytes).decode("utf-8")
1189
+
1190
+ # Build HTML
1191
+ html_parts: list[str] = []
1192
+
1193
+ html_parts.append(
1194
+ f"""<!DOCTYPE html>
1195
+ <html lang="en">
1196
+ <head>
1197
+ <meta charset="UTF-8">
1198
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1199
+ <title>{model_family_id} - Quantization Impact</title>
1200
+ <style>
1201
+ :root {{
1202
+ --bg-deep: #000000;
1203
+ --bg-primary: #0a0a0a;
1204
+ --bg-elevated: #1a1a1a;
1205
+ --bg-card: #252525;
1206
+ --text-primary: rgba(255, 255, 255, 0.92);
1207
+ --text-secondary: rgba(255, 255, 255, 0.55);
1208
+ --accent: #0A84FF;
1209
+ --success: #30D158;
1210
+ --warning: #FFD60A;
1211
+ --error: #FF453A;
1212
+ --border: rgba(255, 255, 255, 0.08);
1213
+ }}
1214
+ * {{ box-sizing: border-box; margin: 0; padding: 0; }}
1215
+ body {{
1216
+ font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', system-ui, sans-serif;
1217
+ background: var(--bg-deep);
1218
+ color: var(--text-primary);
1219
+ line-height: 1.6;
1220
+ padding: 2rem;
1221
+ }}
1222
+ .container {{ max-width: 1400px; margin: 0 auto; }}
1223
+ h1 {{ font-size: 2rem; font-weight: 600; margin-bottom: 0.5rem; }}
1224
+ h2 {{ font-size: 1.3rem; font-weight: 500; margin: 2rem 0 1rem; color: var(--text-primary); }}
1225
+ h3 {{ font-size: 1rem; font-weight: 500; margin: 1.5rem 0 0.75rem; color: var(--text-secondary); }}
1226
+ .subtitle {{ color: var(--text-secondary); margin-bottom: 2rem; }}
1227
+
1228
+ /* Engine Summary Panel */
1229
+ .engine-summary {{
1230
+ background: var(--bg-elevated);
1231
+ border-radius: 12px;
1232
+ padding: 1.5rem;
1233
+ display: grid;
1234
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
1235
+ gap: 1.5rem;
1236
+ border: 1px solid var(--border);
1237
+ }}
1238
+ .summary-item {{ }}
1239
+ .summary-label {{ font-size: 0.75rem; color: var(--text-secondary); text-transform: uppercase; letter-spacing: 0.05em; }}
1240
+ .summary-value {{ font-size: 1.5rem; font-weight: 600; margin-top: 0.25rem; }}
1241
+ .summary-value.accent {{ color: var(--accent); }}
1242
+ .summary-value.success {{ color: var(--success); }}
1243
+
1244
+ /* Variant Cards */
1245
+ .variants-grid {{
1246
+ display: grid;
1247
+ grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
1248
+ gap: 1rem;
1249
+ margin-top: 1rem;
1250
+ }}
1251
+ .variant-card {{
1252
+ background: var(--bg-card);
1253
+ border-radius: 10px;
1254
+ padding: 1.25rem;
1255
+ border: 1px solid var(--border);
1256
+ transition: transform 0.2s, box-shadow 0.2s;
1257
+ }}
1258
+ .variant-card:hover {{
1259
+ transform: translateY(-2px);
1260
+ box-shadow: 0 8px 30px rgba(0,0,0,0.3);
1261
+ }}
1262
+ .variant-card.baseline {{ border-color: var(--accent); }}
1263
+ .variant-header {{
1264
+ display: flex;
1265
+ justify-content: space-between;
1266
+ align-items: center;
1267
+ margin-bottom: 1rem;
1268
+ }}
1269
+ .variant-precision {{
1270
+ font-size: 1.25rem;
1271
+ font-weight: 600;
1272
+ }}
1273
+ .variant-badge {{
1274
+ font-size: 0.7rem;
1275
+ padding: 0.2rem 0.5rem;
1276
+ border-radius: 4px;
1277
+ background: var(--accent);
1278
+ color: white;
1279
+ }}
1280
+ .variant-stats {{ display: grid; grid-template-columns: 1fr 1fr; gap: 0.75rem; }}
1281
+ .stat {{ }}
1282
+ .stat-label {{ font-size: 0.7rem; color: var(--text-secondary); }}
1283
+ .stat-value {{ font-size: 1rem; font-weight: 500; }}
1284
+ .stat-delta {{ font-size: 0.8rem; }}
1285
+ .stat-delta.positive {{ color: var(--success); }}
1286
+ .stat-delta.negative {{ color: var(--error); }}
1287
+
1288
+ /* Charts */
1289
+ .charts-grid {{
1290
+ display: grid;
1291
+ grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
1292
+ gap: 1.5rem;
1293
+ margin-top: 1rem;
1294
+ }}
1295
+ .chart-container {{
1296
+ background: var(--bg-elevated);
1297
+ border-radius: 10px;
1298
+ padding: 1rem;
1299
+ border: 1px solid var(--border);
1300
+ }}
1301
+ .chart-container img {{
1302
+ width: 100%;
1303
+ height: auto;
1304
+ border-radius: 6px;
1305
+ }}
1306
+
1307
+ /* Recommendations */
1308
+ .recommendations {{
1309
+ background: var(--bg-elevated);
1310
+ border-radius: 10px;
1311
+ padding: 1.25rem;
1312
+ border: 1px solid var(--border);
1313
+ }}
1314
+ .rec-item {{
1315
+ display: flex;
1316
+ gap: 0.75rem;
1317
+ padding: 0.75rem 0;
1318
+ border-bottom: 1px solid var(--border);
1319
+ }}
1320
+ .rec-item:last-child {{ border-bottom: none; }}
1321
+ .rec-icon {{ font-size: 1.2rem; }}
1322
+ .rec-text {{ flex: 1; }}
1323
+ .rec-reason {{ font-size: 0.85rem; color: var(--text-secondary); margin-top: 0.25rem; }}
1324
+
1325
+ /* Warnings */
1326
+ .warning-banner {{
1327
+ background: rgba(255, 214, 10, 0.1);
1328
+ border: 1px solid var(--warning);
1329
+ border-radius: 8px;
1330
+ padding: 1rem;
1331
+ margin-bottom: 1.5rem;
1332
+ }}
1333
+ .warning-banner h3 {{ color: var(--warning); margin: 0 0 0.5rem; }}
1334
+ .warning-list {{ list-style: none; }}
1335
+ .warning-list li {{ padding: 0.25rem 0; color: var(--text-secondary); }}
1336
+
1337
+ /* Table */
1338
+ .data-table {{
1339
+ width: 100%;
1340
+ border-collapse: collapse;
1341
+ margin-top: 1rem;
1342
+ }}
1343
+ .data-table th, .data-table td {{
1344
+ padding: 0.75rem;
1345
+ text-align: left;
1346
+ border-bottom: 1px solid var(--border);
1347
+ }}
1348
+ .data-table th {{
1349
+ font-size: 0.75rem;
1350
+ text-transform: uppercase;
1351
+ color: var(--text-secondary);
1352
+ font-weight: 500;
1353
+ }}
1354
+ .data-table tr:hover td {{ background: rgba(255,255,255,0.02); }}
1355
+ </style>
1356
+ </head>
1357
+ <body>
1358
+ <div class="container">
1359
+ <h1>{model_family_id}</h1>
1360
+ <p class="subtitle">Quantization Impact Analysis</p>
1361
+ """
1362
+ )
1363
+
1364
+ # Warning banner if architecture incompatible
1365
+ if not arch_compatible:
1366
+ html_parts.append('<div class="warning-banner">')
1367
+ html_parts.append("<h3>⚠️ Compatibility Warnings</h3>")
1368
+ html_parts.append('<ul class="warning-list">')
1369
+ for warn in warnings:
1370
+ html_parts.append(f"<li>{warn}</li>")
1371
+ html_parts.append("</ul></div>")
1372
+
1373
+ # Engine Summary Panel
1374
+ html_parts.append(
1375
+ """
1376
+ <h2>Engine Summary</h2>
1377
+ <div class="engine-summary">
1378
+ <div class="summary-item">
1379
+ <div class="summary-label">Model Family</div>
1380
+ <div class="summary-value">{model_family}</div>
1381
+ </div>
1382
+ <div class="summary-item">
1383
+ <div class="summary-label">Baseline</div>
1384
+ <div class="summary-value accent">{baseline}</div>
1385
+ </div>
1386
+ <div class="summary-item">
1387
+ <div class="summary-label">Variants</div>
1388
+ <div class="summary-value">{num_variants}</div>
1389
+ </div>
1390
+ <div class="summary-item">
1391
+ <div class="summary-label">Best Balance</div>
1392
+ <div class="summary-value success">{best_balanced}</div>
1393
+ </div>
1394
+ </div>
1395
+ """.format(
1396
+ model_family=model_family_id,
1397
+ baseline=baseline_precision.upper(),
1398
+ num_variants=len(variants),
1399
+ best_balanced=(
1400
+ analysis.get("best_balanced", "N/A").upper()
1401
+ if analysis.get("best_balanced")
1402
+ else "N/A"
1403
+ ),
1404
+ )
1405
+ )
1406
+
1407
+ # Variant Cards
1408
+ html_parts.append("<h2>Variant Comparison</h2>")
1409
+ html_parts.append('<div class="variants-grid">')
1410
+
1411
+ for v in variants:
1412
+ precision = v.get("precision", "unknown")
1413
+ is_baseline = precision == baseline_precision
1414
+ size_bytes = v.get("size_bytes", 0)
1415
+ total_params = v.get("total_params")
1416
+ deltas = v.get("deltas_vs_baseline")
1417
+
1418
+ # Find tradeoff point for this variant
1419
+ tp = next((p for p in tradeoff_points if p.precision == precision), None)
1420
+ speedup = tp.speedup if tp else 1.0
1421
+ acc_delta = tp.accuracy_delta if tp else 0.0
1422
+
1423
+ card_class = "variant-card baseline" if is_baseline else "variant-card"
1424
+ badge = '<span class="variant-badge">BASELINE</span>' if is_baseline else ""
1425
+
1426
+ # Format deltas
1427
+ size_delta_str = ""
1428
+ if deltas and deltas.get("size_bytes"):
1429
+ d = deltas["size_bytes"]
1430
+ pct = (d / size_bytes) * 100 if size_bytes else 0
1431
+ delta_class = "positive" if d < 0 else "negative"
1432
+ size_delta_str = f'<span class="stat-delta {delta_class}">{pct:+.0f}%</span>'
1433
+
1434
+ html_parts.append(
1435
+ f"""
1436
+ <div class="{card_class}">
1437
+ <div class="variant-header">
1438
+ <span class="variant-precision">{precision.upper()}</span>
1439
+ {badge}
1440
+ </div>
1441
+ <div class="variant-stats">
1442
+ <div class="stat">
1443
+ <div class="stat-label">Size</div>
1444
+ <div class="stat-value">{_format_bytes(size_bytes)} {size_delta_str}</div>
1445
+ </div>
1446
+ <div class="stat">
1447
+ <div class="stat-label">Parameters</div>
1448
+ <div class="stat-value">{_format_number(total_params) if total_params else "-"}</div>
1449
+ </div>
1450
+ <div class="stat">
1451
+ <div class="stat-label">Speedup</div>
1452
+ <div class="stat-value">{speedup:.2f}x</div>
1453
+ </div>
1454
+ <div class="stat">
1455
+ <div class="stat-label">Accuracy Δ</div>
1456
+ <div class="stat-value">{acc_delta * 100:+.2f}%</div>
1457
+ </div>
1458
+ </div>
1459
+ </div>
1460
+ """
1461
+ )
1462
+
1463
+ html_parts.append("</div>") # variants-grid
1464
+
1465
+ # Charts
1466
+ if tradeoff_chart_b64 or memory_chart_b64:
1467
+ html_parts.append("<h2>Visualizations</h2>")
1468
+ html_parts.append('<div class="charts-grid">')
1469
+
1470
+ if tradeoff_chart_b64:
1471
+ html_parts.append(
1472
+ f"""
1473
+ <div class="chart-container">
1474
+ <h3>Accuracy vs Speedup Tradeoff</h3>
1475
+ <img src="data:image/png;base64,{tradeoff_chart_b64}" alt="Tradeoff Chart">
1476
+ </div>
1477
+ """
1478
+ )
1479
+
1480
+ if memory_chart_b64:
1481
+ html_parts.append(
1482
+ f"""
1483
+ <div class="chart-container">
1484
+ <h3>Memory & Size Savings</h3>
1485
+ <img src="data:image/png;base64,{memory_chart_b64}" alt="Memory Savings Chart">
1486
+ </div>
1487
+ """
1488
+ )
1489
+
1490
+ html_parts.append("</div>") # charts-grid
1491
+
1492
+ # INT8 Kernel Warning (if INT8 variant present)
1493
+ has_int8 = any(v.get("precision", "").lower() == "int8" for v in variants)
1494
+ if has_int8:
1495
+ html_parts.append(
1496
+ """
1497
+ <div class="callout warning" style="background: #3d2c00; border-left: 4px solid #ffc107; padding: 1rem; margin: 1.5rem 0; border-radius: 4px;">
1498
+ <strong style="color: #ffc107;">INT8 Performance Note</strong>
1499
+ <p style="margin: 0.5rem 0 0 0; color: #e0e0e0;">
1500
+ ONNX Runtime does not have optimized INT8 kernels for GPU execution. The INT8 metrics shown here
1501
+ are based on <strong>CPU inference</strong>, which is significantly slower than GPU.
1502
+ For production INT8 GPU inference, consider converting to <strong>TensorRT</strong> or
1503
+ <strong>OpenVINO</strong> format which have native INT8 GPU support.
1504
+ </p>
1505
+ </div>
1506
+ """
1507
+ )
1508
+
1509
+ # Recommendations
1510
+ html_parts.append("<h2>Recommendations</h2>")
1511
+ html_parts.append('<div class="recommendations">')
1512
+
1513
+ for rec in analysis.get("recommendations", []):
1514
+ html_parts.append(
1515
+ f"""
1516
+ <div class="rec-item">
1517
+ <span class="rec-icon">💡</span>
1518
+ <div class="rec-text">{rec}</div>
1519
+ </div>
1520
+ """
1521
+ )
1522
+
1523
+ html_parts.append("</div>")
1524
+
1525
+ # Calibration Recommendations
1526
+ if calib_recs and any(r.severity != "info" for r in calib_recs):
1527
+ html_parts.append("<h2>Calibration Recommendations</h2>")
1528
+ html_parts.append('<div class="recommendations">')
1529
+
1530
+ for rec in calib_recs:
1531
+ icon = {"info": "i", "warning": "!", "critical": "!!!"}.get(rec.severity, "*")
1532
+ html_parts.append(
1533
+ f"""
1534
+ <div class="rec-item">
1535
+ <span class="rec-icon">{icon}</span>
1536
+ <div class="rec-text">
1537
+ <strong>{rec.recommendation}</strong>
1538
+ <div class="rec-reason">{rec.reason}</div>
1539
+ </div>
1540
+ </div>
1541
+ """
1542
+ )
1543
+
1544
+ html_parts.append("</div>")
1545
+
1546
+ # Footer
1547
+ html_parts.append(
1548
+ """
1549
+ <p style="margin-top: 3rem; color: var(--text-secondary); font-size: 0.85rem; text-align: center;">
1550
+ Generated by HaoLine Compare Mode
1551
+ </p>
1552
+ </div>
1553
+ </body>
1554
+ </html>
1555
+ """
1556
+ )
1557
+
1558
+ html_content = "\n".join(html_parts)
1559
+
1560
+ if output_path:
1561
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1562
+ output_path.write_text(html_content, encoding="utf-8")
1563
+
1564
+ return html_content