haoline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. haoline/.streamlit/config.toml +10 -0
  2. haoline/__init__.py +248 -0
  3. haoline/analyzer.py +935 -0
  4. haoline/cli.py +2712 -0
  5. haoline/compare.py +811 -0
  6. haoline/compare_visualizations.py +1564 -0
  7. haoline/edge_analysis.py +525 -0
  8. haoline/eval/__init__.py +131 -0
  9. haoline/eval/adapters.py +844 -0
  10. haoline/eval/cli.py +390 -0
  11. haoline/eval/comparison.py +542 -0
  12. haoline/eval/deployment.py +633 -0
  13. haoline/eval/schemas.py +833 -0
  14. haoline/examples/__init__.py +15 -0
  15. haoline/examples/basic_inspection.py +74 -0
  16. haoline/examples/compare_models.py +117 -0
  17. haoline/examples/hardware_estimation.py +78 -0
  18. haoline/format_adapters.py +1001 -0
  19. haoline/formats/__init__.py +123 -0
  20. haoline/formats/coreml.py +250 -0
  21. haoline/formats/gguf.py +483 -0
  22. haoline/formats/openvino.py +255 -0
  23. haoline/formats/safetensors.py +273 -0
  24. haoline/formats/tflite.py +369 -0
  25. haoline/hardware.py +2307 -0
  26. haoline/hierarchical_graph.py +462 -0
  27. haoline/html_export.py +1573 -0
  28. haoline/layer_summary.py +769 -0
  29. haoline/llm_summarizer.py +465 -0
  30. haoline/op_icons.py +618 -0
  31. haoline/operational_profiling.py +1492 -0
  32. haoline/patterns.py +1116 -0
  33. haoline/pdf_generator.py +265 -0
  34. haoline/privacy.py +250 -0
  35. haoline/pydantic_models.py +241 -0
  36. haoline/report.py +1923 -0
  37. haoline/report_sections.py +539 -0
  38. haoline/risks.py +521 -0
  39. haoline/schema.py +523 -0
  40. haoline/streamlit_app.py +2024 -0
  41. haoline/tests/__init__.py +4 -0
  42. haoline/tests/conftest.py +123 -0
  43. haoline/tests/test_analyzer.py +868 -0
  44. haoline/tests/test_compare_visualizations.py +293 -0
  45. haoline/tests/test_edge_analysis.py +243 -0
  46. haoline/tests/test_eval.py +604 -0
  47. haoline/tests/test_format_adapters.py +460 -0
  48. haoline/tests/test_hardware.py +237 -0
  49. haoline/tests/test_hardware_recommender.py +90 -0
  50. haoline/tests/test_hierarchical_graph.py +326 -0
  51. haoline/tests/test_html_export.py +180 -0
  52. haoline/tests/test_layer_summary.py +428 -0
  53. haoline/tests/test_llm_patterns.py +540 -0
  54. haoline/tests/test_llm_summarizer.py +339 -0
  55. haoline/tests/test_patterns.py +774 -0
  56. haoline/tests/test_pytorch.py +327 -0
  57. haoline/tests/test_report.py +383 -0
  58. haoline/tests/test_risks.py +398 -0
  59. haoline/tests/test_schema.py +417 -0
  60. haoline/tests/test_tensorflow.py +380 -0
  61. haoline/tests/test_visualizations.py +316 -0
  62. haoline/universal_ir.py +856 -0
  63. haoline/visualizations.py +1086 -0
  64. haoline/visualize_yolo.py +44 -0
  65. haoline/web.py +110 -0
  66. haoline-0.3.0.dist-info/METADATA +471 -0
  67. haoline-0.3.0.dist-info/RECORD +70 -0
  68. haoline-0.3.0.dist-info/WHEEL +4 -0
  69. haoline-0.3.0.dist-info/entry_points.txt +5 -0
  70. haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,604 @@
1
+ """Tests for the eval module: schemas, adapters, and linking utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+
8
+ import pytest
9
+
10
+ from haoline.eval.comparison import (
11
+ ModelComparisonRow,
12
+ ModelComparisonTable,
13
+ compare_models,
14
+ generate_eval_metrics_html,
15
+ )
16
+ from haoline.eval.deployment import (
17
+ DeploymentScenario,
18
+ DeploymentTarget,
19
+ calculate_deployment_cost,
20
+ estimate_latency_from_flops,
21
+ get_hardware_tier,
22
+ list_hardware_tiers,
23
+ select_hardware_tier_for_latency,
24
+ )
25
+ from haoline.eval.schemas import (
26
+ CombinedReport,
27
+ DetectionEvalResult,
28
+ EvalMetric,
29
+ EvalResult,
30
+ compute_model_hash,
31
+ create_combined_report,
32
+ link_eval_to_model,
33
+ validate_eval_result,
34
+ )
35
+
36
+
37
+ class TestEvalMetric:
38
+ """Tests for EvalMetric Pydantic model."""
39
+
40
+ def test_create_metric(self) -> None:
41
+ """Test creating an EvalMetric."""
42
+ metric = EvalMetric(
43
+ name="accuracy",
44
+ value=95.5,
45
+ unit="%",
46
+ higher_is_better=True,
47
+ category="accuracy",
48
+ )
49
+ assert metric.name == "accuracy"
50
+ assert metric.value == 95.5
51
+ assert metric.unit == "%"
52
+ assert metric.higher_is_better is True
53
+
54
+ def test_metric_json_serialization(self) -> None:
55
+ """Test EvalMetric serialization."""
56
+ metric = EvalMetric(
57
+ name="loss",
58
+ value=0.05,
59
+ unit="",
60
+ higher_is_better=False,
61
+ category="loss",
62
+ )
63
+ data = json.loads(metric.model_dump_json())
64
+ assert data["name"] == "loss"
65
+ assert data["higher_is_better"] is False
66
+
67
+
68
+ class TestEvalResult:
69
+ """Tests for EvalResult base class."""
70
+
71
+ def test_create_eval_result(self) -> None:
72
+ """Test creating an EvalResult."""
73
+ result = EvalResult(
74
+ model_id="test-model",
75
+ task_type="classification",
76
+ dataset="imagenet",
77
+ metrics=[
78
+ EvalMetric(
79
+ name="top1",
80
+ value=76.5,
81
+ unit="%",
82
+ higher_is_better=True,
83
+ category="accuracy",
84
+ )
85
+ ],
86
+ )
87
+ assert result.model_id == "test-model"
88
+ assert result.task_type == "classification"
89
+ assert len(result.metrics) == 1
90
+
91
+ def test_to_json(self) -> None:
92
+ """Test JSON serialization."""
93
+ result = EvalResult(
94
+ model_id="model",
95
+ task_type="detection",
96
+ metrics=[],
97
+ )
98
+ json_str = result.to_json()
99
+ data = json.loads(json_str)
100
+ assert data["model_id"] == "model"
101
+ assert data["task_type"] == "detection"
102
+
103
+
104
+ class TestDetectionEvalResult:
105
+ """Tests for detection-specific eval result."""
106
+
107
+ def test_create_with_factory(self) -> None:
108
+ """Test using the create() convenience method."""
109
+ result = DetectionEvalResult.create(
110
+ model_id="yolov8n",
111
+ dataset="coco",
112
+ map50=0.65,
113
+ map50_95=0.48,
114
+ precision=0.72,
115
+ recall=0.68,
116
+ f1=0.70,
117
+ )
118
+ assert result.model_id == "yolov8n"
119
+ assert result.dataset == "coco"
120
+ assert len(result.metrics) == 5
121
+
122
+
123
+ class TestLinkingUtilities:
124
+ """Tests for model-eval linking functions."""
125
+
126
+ def test_compute_model_hash(self, tmp_path: Path) -> None:
127
+ """Test computing file hash."""
128
+ # Create a temporary file
129
+ test_file = tmp_path / "model.onnx"
130
+ test_file.write_bytes(b"fake model content")
131
+
132
+ hash_result = compute_model_hash(str(test_file))
133
+ assert len(hash_result) == 64 # SHA-256 hex length
134
+ assert hash_result.isalnum()
135
+
136
+ def test_compute_model_hash_not_found(self) -> None:
137
+ """Test hash of non-existent file raises error."""
138
+ with pytest.raises(FileNotFoundError):
139
+ compute_model_hash("/nonexistent/path/model.onnx")
140
+
141
+ def test_link_eval_to_model(self, tmp_path: Path) -> None:
142
+ """Test linking eval result to model file."""
143
+ # Create a temporary model file
144
+ model_file = tmp_path / "yolov8n.onnx"
145
+ model_file.write_bytes(b"model content")
146
+
147
+ result = EvalResult(
148
+ model_id="",
149
+ task_type="detection",
150
+ metrics=[],
151
+ )
152
+
153
+ linked = link_eval_to_model(str(model_file), result, use_hash=False)
154
+ assert linked.model_id == "yolov8n"
155
+ assert "linked_model_path" in linked.metadata
156
+
157
+ def test_link_eval_to_model_with_hash(self, tmp_path: Path) -> None:
158
+ """Test linking with hash-based model ID."""
159
+ model_file = tmp_path / "model.onnx"
160
+ model_file.write_bytes(b"unique content")
161
+
162
+ result = EvalResult(
163
+ model_id="",
164
+ task_type="classification",
165
+ metrics=[],
166
+ )
167
+
168
+ linked = link_eval_to_model(str(model_file), result, use_hash=True)
169
+ assert len(linked.model_id) == 12 # Short hash
170
+
171
+ def test_create_combined_report_no_inspection(self, tmp_path: Path) -> None:
172
+ """Test creating combined report without running inspection."""
173
+ model_file = tmp_path / "model.onnx"
174
+ model_file.write_bytes(b"model")
175
+
176
+ eval_result = DetectionEvalResult.create(
177
+ model_id="",
178
+ dataset="coco",
179
+ map50=0.65,
180
+ map50_95=0.48,
181
+ precision=0.72,
182
+ recall=0.68,
183
+ f1=0.70,
184
+ )
185
+
186
+ combined = create_combined_report(
187
+ str(model_file),
188
+ eval_results=[eval_result],
189
+ run_inspection=False,
190
+ )
191
+
192
+ assert combined.model_id == "model"
193
+ assert len(combined.eval_results) == 1
194
+ assert combined.eval_results[0].model_id == "model"
195
+
196
+
197
+ class TestValidation:
198
+ """Tests for schema validation."""
199
+
200
+ def test_validate_valid_eval_result(self) -> None:
201
+ """Test validation of valid data."""
202
+ data = {
203
+ "model_id": "test",
204
+ "task_type": "classification",
205
+ "metrics": [],
206
+ }
207
+ assert validate_eval_result(data) is True
208
+
209
+ def test_validate_invalid_eval_result(self) -> None:
210
+ """Test validation of invalid data."""
211
+ data = {"invalid": "data"}
212
+ assert validate_eval_result(data) is False
213
+
214
+
215
+ class TestCombinedReport:
216
+ """Tests for CombinedReport model."""
217
+
218
+ def test_create_combined_report(self) -> None:
219
+ """Test creating a CombinedReport manually."""
220
+ combined = CombinedReport(
221
+ model_id="resnet50",
222
+ model_path="/path/to/resnet50.onnx",
223
+ architecture={
224
+ "params_total": 25_000_000,
225
+ "flops_total": 4_000_000_000,
226
+ },
227
+ eval_results=[],
228
+ )
229
+ assert combined.model_id == "resnet50"
230
+ assert combined.architecture["params_total"] == 25_000_000
231
+
232
+ def test_add_eval_result(self) -> None:
233
+ """Test adding eval results to combined report."""
234
+ combined = CombinedReport(
235
+ model_id="model",
236
+ architecture={},
237
+ )
238
+ eval_result = EvalResult(
239
+ model_id="model",
240
+ task_type="classification",
241
+ metrics=[],
242
+ )
243
+ combined.add_eval_result(eval_result)
244
+ assert len(combined.eval_results) == 1
245
+
246
+ def test_get_eval_by_task(self) -> None:
247
+ """Test retrieving eval by task type."""
248
+ combined = CombinedReport(
249
+ model_id="model",
250
+ architecture={},
251
+ eval_results=[
252
+ EvalResult(model_id="m", task_type="detection", metrics=[]),
253
+ EvalResult(model_id="m", task_type="classification", metrics=[]),
254
+ ],
255
+ )
256
+ det = combined.get_eval_by_task("detection")
257
+ assert det is not None
258
+ assert det.task_type == "detection"
259
+
260
+ missing = combined.get_eval_by_task("segmentation")
261
+ assert missing is None
262
+
263
+
264
+ # =============================================================================
265
+ # Deployment Cost Calculator Tests
266
+ # =============================================================================
267
+
268
+
269
+ class TestDeploymentScenario:
270
+ """Tests for DeploymentScenario dataclass."""
271
+
272
+ def test_default_scenario(self) -> None:
273
+ """Test creating scenario with defaults."""
274
+ scenario = DeploymentScenario()
275
+ assert scenario.target_fps == 30.0
276
+ assert scenario.hours_per_day == 24.0
277
+ assert scenario.target == DeploymentTarget.CLOUD_GPU
278
+
279
+ def test_realtime_video_preset(self) -> None:
280
+ """Test realtime video preset."""
281
+ scenario = DeploymentScenario.realtime_video(fps=60.0)
282
+ assert scenario.target_fps == 60.0
283
+ assert scenario.max_latency_ms == pytest.approx(1000.0 / 60, rel=0.01)
284
+ assert scenario.name == "realtime_video"
285
+
286
+ def test_edge_device_preset(self) -> None:
287
+ """Test edge device preset."""
288
+ scenario = DeploymentScenario.edge_device(fps=10.0)
289
+ assert scenario.target == DeploymentTarget.EDGE_GPU
290
+ assert scenario.target_fps == 10.0
291
+
292
+ def test_serialization(self) -> None:
293
+ """Test to_dict and from_dict."""
294
+ original = DeploymentScenario(
295
+ target_fps=15.0,
296
+ hours_per_day=8.0,
297
+ precision="fp16",
298
+ )
299
+ data = original.to_dict()
300
+ restored = DeploymentScenario.from_dict(data)
301
+ assert restored.target_fps == 15.0
302
+ assert restored.hours_per_day == 8.0
303
+ assert restored.precision == "fp16"
304
+
305
+
306
+ class TestHardwareTiers:
307
+ """Tests for hardware tier lookups."""
308
+
309
+ def test_get_hardware_tier(self) -> None:
310
+ """Test getting a tier by name."""
311
+ tier = get_hardware_tier("t4")
312
+ assert tier is not None
313
+ assert tier.name == "T4"
314
+ assert tier.cost_per_hour_usd > 0
315
+
316
+ def test_get_hardware_tier_case_insensitive(self) -> None:
317
+ """Test case-insensitive lookup."""
318
+ tier = get_hardware_tier("A10G")
319
+ assert tier is not None
320
+ assert tier.name == "A10G"
321
+
322
+ def test_get_unknown_tier(self) -> None:
323
+ """Test getting non-existent tier returns None."""
324
+ tier = get_hardware_tier("nonexistent")
325
+ assert tier is None
326
+
327
+ def test_list_hardware_tiers(self) -> None:
328
+ """Test listing all tiers."""
329
+ tiers = list_hardware_tiers()
330
+ assert len(tiers) > 0
331
+ # Should be sorted by cost
332
+ costs = [t.cost_per_hour_usd for t in tiers]
333
+ assert costs == sorted(costs)
334
+
335
+ def test_list_hardware_tiers_filtered(self) -> None:
336
+ """Test filtering by target."""
337
+ gpu_tiers = list_hardware_tiers(DeploymentTarget.CLOUD_GPU)
338
+ for tier in gpu_tiers:
339
+ assert tier.target == DeploymentTarget.CLOUD_GPU
340
+
341
+ edge_tiers = list_hardware_tiers(DeploymentTarget.EDGE_GPU)
342
+ for tier in edge_tiers:
343
+ assert tier.target == DeploymentTarget.EDGE_GPU
344
+
345
+
346
+ class TestCostCalculation:
347
+ """Tests for deployment cost calculation."""
348
+
349
+ def test_estimate_latency_from_flops(self) -> None:
350
+ """Test latency estimation."""
351
+ tier = get_hardware_tier("t4")
352
+ assert tier is not None
353
+
354
+ # 1 GFLOP model
355
+ flops = 1_000_000_000
356
+ latency = estimate_latency_from_flops(flops, tier, "fp32")
357
+
358
+ # Should be a reasonable latency value
359
+ assert latency > 0
360
+ assert latency < 10000 # Less than 10 seconds
361
+
362
+ def test_select_hardware_for_latency(self) -> None:
363
+ """Test hardware selection based on latency SLA."""
364
+ flops = 10_000_000_000 # 10 GFLOP model
365
+
366
+ # Strict latency requirement - should pick faster hardware
367
+ tier = select_hardware_tier_for_latency(
368
+ flops,
369
+ target_latency_ms=10.0,
370
+ precision="fp16",
371
+ )
372
+ # May or may not find suitable tier
373
+ if tier:
374
+ assert tier.cost_per_hour_usd > 0
375
+
376
+ def test_calculate_deployment_cost(self) -> None:
377
+ """Test full cost calculation."""
378
+ scenario = DeploymentScenario(
379
+ target_fps=10.0,
380
+ hours_per_day=8.0,
381
+ days_per_month=22, # Business days
382
+ target=DeploymentTarget.CLOUD_GPU,
383
+ )
384
+
385
+ flops = 5_000_000_000 # 5 GFLOP model
386
+ estimate = calculate_deployment_cost(flops, scenario)
387
+
388
+ # Check basic fields are populated
389
+ assert estimate.hardware_tier is not None
390
+ assert estimate.cost_per_hour_usd >= 0
391
+ assert estimate.cost_per_day_usd >= 0
392
+ assert estimate.cost_per_month_usd >= 0
393
+ assert estimate.estimated_latency_ms > 0
394
+
395
+ # Costs should scale correctly
396
+ assert estimate.cost_per_day_usd == pytest.approx(
397
+ estimate.cost_per_hour_usd * 8.0, rel=0.01
398
+ )
399
+ assert estimate.cost_per_month_usd == pytest.approx(
400
+ estimate.cost_per_day_usd * 22, rel=0.01
401
+ )
402
+
403
+ def test_cost_estimate_summary(self) -> None:
404
+ """Test human-readable summary generation."""
405
+ scenario = DeploymentScenario(
406
+ target_fps=30.0,
407
+ hours_per_day=24.0,
408
+ name="test_scenario",
409
+ )
410
+
411
+ estimate = calculate_deployment_cost(1_000_000_000, scenario)
412
+ summary = estimate.summary()
413
+
414
+ assert "test_scenario" in summary
415
+ assert "Per hour:" in summary
416
+ assert "Per month:" in summary
417
+
418
+
419
+ # =============================================================================
420
+ # Model Comparison Tests
421
+ # =============================================================================
422
+
423
+
424
+ class TestModelComparison:
425
+ """Tests for multi-model comparison functionality."""
426
+
427
+ def test_create_comparison_row(self) -> None:
428
+ """Test creating a comparison row from combined report."""
429
+ report = CombinedReport(
430
+ model_id="yolov8n",
431
+ model_path="/path/to/yolov8n.onnx",
432
+ architecture={
433
+ "params_total": 3_000_000,
434
+ "flops_total": 8_000_000_000,
435
+ "model_size_bytes": 12 * 1024 * 1024,
436
+ },
437
+ primary_accuracy_metric="mAP@50",
438
+ primary_accuracy_value=65.0,
439
+ )
440
+
441
+ row = ModelComparisonRow.from_combined_report(report)
442
+
443
+ assert row.model_id == "yolov8n"
444
+ assert row.params_total == 3_000_000
445
+ assert row.flops_total == 8_000_000_000
446
+ assert row.model_size_mb == pytest.approx(12.0, rel=0.01)
447
+ assert row.primary_metric_value == 65.0
448
+
449
+ def test_comparison_table(self) -> None:
450
+ """Test creating and populating a comparison table."""
451
+ report1 = CombinedReport(
452
+ model_id="model_a",
453
+ architecture={"params_total": 1_000_000, "flops_total": 1e9},
454
+ primary_accuracy_metric="accuracy",
455
+ primary_accuracy_value=90.0,
456
+ )
457
+ report2 = CombinedReport(
458
+ model_id="model_b",
459
+ architecture={"params_total": 5_000_000, "flops_total": 5e9},
460
+ primary_accuracy_metric="accuracy",
461
+ primary_accuracy_value=95.0,
462
+ )
463
+
464
+ table = ModelComparisonTable(title="Test Comparison")
465
+ table.add_model(report1)
466
+ table.add_model(report2)
467
+
468
+ assert len(table.rows) == 2
469
+ assert table.rows[0].model_id == "model_a"
470
+ assert table.rows[1].model_id == "model_b"
471
+
472
+ def test_compare_models_function(self) -> None:
473
+ """Test the compare_models() convenience function."""
474
+ reports = [
475
+ CombinedReport(
476
+ model_id="small",
477
+ architecture={"params_total": 1_000_000},
478
+ primary_accuracy_value=80.0,
479
+ ),
480
+ CombinedReport(
481
+ model_id="medium",
482
+ architecture={"params_total": 10_000_000},
483
+ primary_accuracy_value=90.0,
484
+ ),
485
+ CombinedReport(
486
+ model_id="large",
487
+ architecture={"params_total": 100_000_000},
488
+ primary_accuracy_value=95.0,
489
+ ),
490
+ ]
491
+
492
+ table = compare_models(
493
+ reports,
494
+ sort_by="primary_metric_value",
495
+ sort_descending=True,
496
+ )
497
+
498
+ assert len(table.rows) == 3
499
+ # Should be sorted by accuracy descending
500
+ assert table.rows[0].model_id == "large"
501
+ assert table.rows[1].model_id == "medium"
502
+ assert table.rows[2].model_id == "small"
503
+
504
+ def test_table_to_csv(self) -> None:
505
+ """Test CSV export."""
506
+ report = CombinedReport(
507
+ model_id="test_model",
508
+ architecture={"params_total": 1_000_000},
509
+ )
510
+ table = ModelComparisonTable()
511
+ table.add_model(report)
512
+
513
+ csv_output = table.to_csv()
514
+ assert "model_id" in csv_output
515
+ assert "test_model" in csv_output
516
+
517
+ def test_table_to_json(self) -> None:
518
+ """Test JSON export."""
519
+ report = CombinedReport(
520
+ model_id="test_model",
521
+ architecture={"params_total": 1_000_000},
522
+ )
523
+ table = ModelComparisonTable(title="JSON Test")
524
+ table.add_model(report)
525
+
526
+ json_output = table.to_json()
527
+ data = json.loads(json_output)
528
+
529
+ assert data["title"] == "JSON Test"
530
+ assert len(data["rows"]) == 1
531
+ assert data["rows"][0]["model_id"] == "test_model"
532
+
533
+ def test_table_to_markdown(self) -> None:
534
+ """Test Markdown export."""
535
+ report = CombinedReport(
536
+ model_id="model_a",
537
+ architecture={"params_total": 3_000_000, "flops_total": 8e9},
538
+ primary_accuracy_value=75.5,
539
+ )
540
+ table = ModelComparisonTable(title="MD Test")
541
+ table.add_model(report)
542
+
543
+ md_output = table.to_markdown()
544
+
545
+ assert "## MD Test" in md_output
546
+ assert "| Model |" in md_output
547
+ assert "model_a" in md_output
548
+ assert "3.0M" in md_output
549
+ assert "75.5%" in md_output
550
+
551
+ def test_table_to_console(self) -> None:
552
+ """Test console table output."""
553
+ report = CombinedReport(
554
+ model_id="console_test",
555
+ architecture={"params_total": 2_000_000},
556
+ )
557
+ table = ModelComparisonTable(title="Console Test")
558
+ table.add_model(report)
559
+
560
+ console_output = table.to_console()
561
+
562
+ assert "Console Test" in console_output
563
+ assert "console_test" in console_output
564
+
565
+ def test_generate_eval_metrics_html(self) -> None:
566
+ """Test HTML generation for eval metrics."""
567
+ eval_result = EvalResult(
568
+ model_id="test",
569
+ task_type="classification",
570
+ metrics=[
571
+ EvalMetric(
572
+ name="accuracy",
573
+ value=95.5,
574
+ unit="%",
575
+ higher_is_better=True,
576
+ category="accuracy",
577
+ ),
578
+ EvalMetric(
579
+ name="f1",
580
+ value=0.93,
581
+ unit="",
582
+ higher_is_better=True,
583
+ category="accuracy",
584
+ ),
585
+ ],
586
+ )
587
+
588
+ html = generate_eval_metrics_html([eval_result])
589
+
590
+ assert '<section class="eval-metrics">' in html
591
+ assert "accuracy" in html
592
+ assert "95.5%" in html
593
+ assert "classification" in html
594
+
595
+ def test_generate_eval_metrics_html_with_cost(self) -> None:
596
+ """Test HTML generation includes cost estimate."""
597
+ scenario = DeploymentScenario(target_fps=30.0)
598
+ cost_estimate = calculate_deployment_cost(1_000_000_000, scenario)
599
+
600
+ html = generate_eval_metrics_html([], cost_estimate)
601
+
602
+ assert "Deployment Cost Estimate" in html
603
+ assert "$/Month" in html
604
+ assert cost_estimate.hardware_tier.name in html