haoline 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haoline/.streamlit/config.toml +10 -0
- haoline/__init__.py +248 -0
- haoline/analyzer.py +935 -0
- haoline/cli.py +2712 -0
- haoline/compare.py +811 -0
- haoline/compare_visualizations.py +1564 -0
- haoline/edge_analysis.py +525 -0
- haoline/eval/__init__.py +131 -0
- haoline/eval/adapters.py +844 -0
- haoline/eval/cli.py +390 -0
- haoline/eval/comparison.py +542 -0
- haoline/eval/deployment.py +633 -0
- haoline/eval/schemas.py +833 -0
- haoline/examples/__init__.py +15 -0
- haoline/examples/basic_inspection.py +74 -0
- haoline/examples/compare_models.py +117 -0
- haoline/examples/hardware_estimation.py +78 -0
- haoline/format_adapters.py +1001 -0
- haoline/formats/__init__.py +123 -0
- haoline/formats/coreml.py +250 -0
- haoline/formats/gguf.py +483 -0
- haoline/formats/openvino.py +255 -0
- haoline/formats/safetensors.py +273 -0
- haoline/formats/tflite.py +369 -0
- haoline/hardware.py +2307 -0
- haoline/hierarchical_graph.py +462 -0
- haoline/html_export.py +1573 -0
- haoline/layer_summary.py +769 -0
- haoline/llm_summarizer.py +465 -0
- haoline/op_icons.py +618 -0
- haoline/operational_profiling.py +1492 -0
- haoline/patterns.py +1116 -0
- haoline/pdf_generator.py +265 -0
- haoline/privacy.py +250 -0
- haoline/pydantic_models.py +241 -0
- haoline/report.py +1923 -0
- haoline/report_sections.py +539 -0
- haoline/risks.py +521 -0
- haoline/schema.py +523 -0
- haoline/streamlit_app.py +2024 -0
- haoline/tests/__init__.py +4 -0
- haoline/tests/conftest.py +123 -0
- haoline/tests/test_analyzer.py +868 -0
- haoline/tests/test_compare_visualizations.py +293 -0
- haoline/tests/test_edge_analysis.py +243 -0
- haoline/tests/test_eval.py +604 -0
- haoline/tests/test_format_adapters.py +460 -0
- haoline/tests/test_hardware.py +237 -0
- haoline/tests/test_hardware_recommender.py +90 -0
- haoline/tests/test_hierarchical_graph.py +326 -0
- haoline/tests/test_html_export.py +180 -0
- haoline/tests/test_layer_summary.py +428 -0
- haoline/tests/test_llm_patterns.py +540 -0
- haoline/tests/test_llm_summarizer.py +339 -0
- haoline/tests/test_patterns.py +774 -0
- haoline/tests/test_pytorch.py +327 -0
- haoline/tests/test_report.py +383 -0
- haoline/tests/test_risks.py +398 -0
- haoline/tests/test_schema.py +417 -0
- haoline/tests/test_tensorflow.py +380 -0
- haoline/tests/test_visualizations.py +316 -0
- haoline/universal_ir.py +856 -0
- haoline/visualizations.py +1086 -0
- haoline/visualize_yolo.py +44 -0
- haoline/web.py +110 -0
- haoline-0.3.0.dist-info/METADATA +471 -0
- haoline-0.3.0.dist-info/RECORD +70 -0
- haoline-0.3.0.dist-info/WHEEL +4 -0
- haoline-0.3.0.dist-info/entry_points.txt +5 -0
- haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,542 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multi-Model Comparison Module for HaoLine.
|
|
3
|
+
|
|
4
|
+
Generate comparison tables and reports across multiple models with:
|
|
5
|
+
- Architecture metrics (params, FLOPs, memory)
|
|
6
|
+
- Evaluation metrics (accuracy, mAP, etc.)
|
|
7
|
+
- Hardware estimates (latency, throughput)
|
|
8
|
+
- Deployment costs
|
|
9
|
+
|
|
10
|
+
Supports export to:
|
|
11
|
+
- Console tables (rich formatting)
|
|
12
|
+
- CSV/JSON for further analysis
|
|
13
|
+
- HTML for reports
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import csv
|
|
19
|
+
import json
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from io import StringIO
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from .deployment import (
|
|
26
|
+
DeploymentCostEstimate,
|
|
27
|
+
DeploymentScenario,
|
|
28
|
+
calculate_deployment_cost,
|
|
29
|
+
)
|
|
30
|
+
from .schemas import CombinedReport
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class ModelComparisonRow:
|
|
35
|
+
"""
|
|
36
|
+
A single row in the comparison table.
|
|
37
|
+
|
|
38
|
+
Represents one model with all its metrics for comparison.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
model_id: str
|
|
42
|
+
model_path: str = ""
|
|
43
|
+
|
|
44
|
+
# Architecture metrics
|
|
45
|
+
params_total: int = 0
|
|
46
|
+
flops_total: int = 0
|
|
47
|
+
model_size_mb: float = 0.0
|
|
48
|
+
|
|
49
|
+
# Primary accuracy metric (task-dependent)
|
|
50
|
+
primary_metric_name: str = ""
|
|
51
|
+
primary_metric_value: float = 0.0
|
|
52
|
+
|
|
53
|
+
# Speed metrics
|
|
54
|
+
latency_ms: float = 0.0
|
|
55
|
+
throughput_fps: float = 0.0
|
|
56
|
+
hardware_tier: str = ""
|
|
57
|
+
|
|
58
|
+
# Cost metrics
|
|
59
|
+
cost_per_month_usd: float = 0.0
|
|
60
|
+
cost_per_1k_inferences_usd: float = 0.0
|
|
61
|
+
|
|
62
|
+
# Additional metrics (for detailed comparison)
|
|
63
|
+
extra_metrics: dict[str, float] = field(default_factory=dict)
|
|
64
|
+
|
|
65
|
+
# Source data references
|
|
66
|
+
combined_report: CombinedReport | None = None
|
|
67
|
+
cost_estimate: DeploymentCostEstimate | None = None
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict[str, Any]:
|
|
70
|
+
"""Convert to dictionary for serialization."""
|
|
71
|
+
return {
|
|
72
|
+
"model_id": self.model_id,
|
|
73
|
+
"model_path": self.model_path,
|
|
74
|
+
"params_total": self.params_total,
|
|
75
|
+
"flops_total": self.flops_total,
|
|
76
|
+
"model_size_mb": self.model_size_mb,
|
|
77
|
+
"primary_metric_name": self.primary_metric_name,
|
|
78
|
+
"primary_metric_value": self.primary_metric_value,
|
|
79
|
+
"latency_ms": self.latency_ms,
|
|
80
|
+
"throughput_fps": self.throughput_fps,
|
|
81
|
+
"hardware_tier": self.hardware_tier,
|
|
82
|
+
"cost_per_month_usd": self.cost_per_month_usd,
|
|
83
|
+
"cost_per_1k_inferences_usd": self.cost_per_1k_inferences_usd,
|
|
84
|
+
"extra_metrics": self.extra_metrics,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
@classmethod
|
|
88
|
+
def from_combined_report(
|
|
89
|
+
cls,
|
|
90
|
+
report: CombinedReport,
|
|
91
|
+
cost_estimate: DeploymentCostEstimate | None = None,
|
|
92
|
+
) -> ModelComparisonRow:
|
|
93
|
+
"""
|
|
94
|
+
Create a comparison row from a CombinedReport.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
report: CombinedReport with architecture and eval data.
|
|
98
|
+
cost_estimate: Optional pre-computed cost estimate.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
ModelComparisonRow with extracted metrics.
|
|
102
|
+
"""
|
|
103
|
+
arch = report.architecture
|
|
104
|
+
row = cls(
|
|
105
|
+
model_id=report.model_id,
|
|
106
|
+
model_path=report.model_path,
|
|
107
|
+
params_total=arch.get("params_total", 0),
|
|
108
|
+
flops_total=arch.get("flops_total", 0),
|
|
109
|
+
model_size_mb=arch.get("model_size_bytes", 0) / (1024 * 1024),
|
|
110
|
+
primary_metric_name=report.primary_accuracy_metric,
|
|
111
|
+
primary_metric_value=report.primary_accuracy_value,
|
|
112
|
+
latency_ms=report.latency_ms,
|
|
113
|
+
throughput_fps=report.throughput_fps,
|
|
114
|
+
hardware_tier=report.hardware_profile,
|
|
115
|
+
combined_report=report,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Add cost if available
|
|
119
|
+
if cost_estimate:
|
|
120
|
+
row.cost_per_month_usd = cost_estimate.cost_per_month_usd
|
|
121
|
+
row.cost_per_1k_inferences_usd = cost_estimate.cost_per_1k_inferences_usd
|
|
122
|
+
row.cost_estimate = cost_estimate
|
|
123
|
+
if not row.hardware_tier:
|
|
124
|
+
row.hardware_tier = cost_estimate.hardware_tier.name
|
|
125
|
+
|
|
126
|
+
# Extract extra metrics from eval results
|
|
127
|
+
for eval_result in report.eval_results:
|
|
128
|
+
for metric in eval_result.metrics:
|
|
129
|
+
if metric.name != row.primary_metric_name:
|
|
130
|
+
row.extra_metrics[metric.name] = metric.value
|
|
131
|
+
|
|
132
|
+
return row
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class ModelComparisonTable:
|
|
137
|
+
"""
|
|
138
|
+
Multi-model comparison table.
|
|
139
|
+
|
|
140
|
+
Holds comparison data for multiple models and provides
|
|
141
|
+
various output formats.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
rows: list[ModelComparisonRow] = field(default_factory=list)
|
|
145
|
+
scenario: DeploymentScenario | None = None
|
|
146
|
+
|
|
147
|
+
# Table metadata
|
|
148
|
+
title: str = "Model Comparison"
|
|
149
|
+
description: str = ""
|
|
150
|
+
|
|
151
|
+
def add_model(
|
|
152
|
+
self,
|
|
153
|
+
report: CombinedReport,
|
|
154
|
+
scenario: DeploymentScenario | None = None,
|
|
155
|
+
) -> ModelComparisonRow:
|
|
156
|
+
"""
|
|
157
|
+
Add a model to the comparison table.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
report: CombinedReport with model data.
|
|
161
|
+
scenario: Optional deployment scenario for cost calculation.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
The created ModelComparisonRow.
|
|
165
|
+
"""
|
|
166
|
+
# Calculate cost if scenario provided
|
|
167
|
+
cost_estimate = None
|
|
168
|
+
if scenario and report.architecture.get("flops_total"):
|
|
169
|
+
cost_estimate = calculate_deployment_cost(
|
|
170
|
+
report.architecture["flops_total"],
|
|
171
|
+
scenario,
|
|
172
|
+
report.architecture.get("model_size_bytes", 0),
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
row = ModelComparisonRow.from_combined_report(report, cost_estimate)
|
|
176
|
+
self.rows.append(row)
|
|
177
|
+
return row
|
|
178
|
+
|
|
179
|
+
def sort_by(
|
|
180
|
+
self,
|
|
181
|
+
key: str,
|
|
182
|
+
reverse: bool = False,
|
|
183
|
+
) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Sort the table by a given metric.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
key: Attribute name to sort by (e.g., 'primary_metric_value', 'cost_per_month_usd').
|
|
189
|
+
reverse: If True, sort in descending order.
|
|
190
|
+
"""
|
|
191
|
+
self.rows.sort(key=lambda r: getattr(r, key, 0), reverse=reverse)
|
|
192
|
+
|
|
193
|
+
def to_dict(self) -> dict[str, Any]:
|
|
194
|
+
"""Convert to dictionary for serialization."""
|
|
195
|
+
return {
|
|
196
|
+
"title": self.title,
|
|
197
|
+
"description": self.description,
|
|
198
|
+
"scenario": self.scenario.to_dict() if self.scenario else None,
|
|
199
|
+
"rows": [row.to_dict() for row in self.rows],
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def to_json(self, indent: int = 2) -> str:
|
|
203
|
+
"""Serialize to JSON string."""
|
|
204
|
+
return json.dumps(self.to_dict(), indent=indent)
|
|
205
|
+
|
|
206
|
+
def to_csv(self) -> str:
|
|
207
|
+
"""
|
|
208
|
+
Export comparison table to CSV format.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
CSV string with all comparison metrics.
|
|
212
|
+
"""
|
|
213
|
+
if not self.rows:
|
|
214
|
+
return ""
|
|
215
|
+
|
|
216
|
+
output = StringIO()
|
|
217
|
+
fieldnames = [
|
|
218
|
+
"model_id",
|
|
219
|
+
"params_total",
|
|
220
|
+
"flops_total",
|
|
221
|
+
"model_size_mb",
|
|
222
|
+
"primary_metric_name",
|
|
223
|
+
"primary_metric_value",
|
|
224
|
+
"latency_ms",
|
|
225
|
+
"throughput_fps",
|
|
226
|
+
"hardware_tier",
|
|
227
|
+
"cost_per_month_usd",
|
|
228
|
+
"cost_per_1k_inferences_usd",
|
|
229
|
+
]
|
|
230
|
+
|
|
231
|
+
writer = csv.DictWriter(output, fieldnames=fieldnames, extrasaction="ignore")
|
|
232
|
+
writer.writeheader()
|
|
233
|
+
for row in self.rows:
|
|
234
|
+
writer.writerow(row.to_dict())
|
|
235
|
+
|
|
236
|
+
return output.getvalue()
|
|
237
|
+
|
|
238
|
+
def to_markdown(self) -> str:
|
|
239
|
+
"""
|
|
240
|
+
Generate a markdown table for display.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Markdown-formatted comparison table.
|
|
244
|
+
"""
|
|
245
|
+
if not self.rows:
|
|
246
|
+
return "*No models to compare*"
|
|
247
|
+
|
|
248
|
+
lines = []
|
|
249
|
+
|
|
250
|
+
# Title
|
|
251
|
+
if self.title:
|
|
252
|
+
lines.append(f"## {self.title}")
|
|
253
|
+
lines.append("")
|
|
254
|
+
|
|
255
|
+
if self.description:
|
|
256
|
+
lines.append(self.description)
|
|
257
|
+
lines.append("")
|
|
258
|
+
|
|
259
|
+
# Table header
|
|
260
|
+
lines.append(
|
|
261
|
+
"| Model | Params | FLOPs | Size | Accuracy | Latency | Throughput | $/Month |"
|
|
262
|
+
)
|
|
263
|
+
lines.append(
|
|
264
|
+
"|-------|--------|-------|------|----------|---------|------------|---------|"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Table rows
|
|
268
|
+
for row in self.rows:
|
|
269
|
+
params_str = _format_number(row.params_total)
|
|
270
|
+
flops_str = _format_number(row.flops_total)
|
|
271
|
+
size_str = f"{row.model_size_mb:.1f} MB"
|
|
272
|
+
acc_str = f"{row.primary_metric_value:.1f}%" if row.primary_metric_value else "N/A"
|
|
273
|
+
lat_str = f"{row.latency_ms:.1f} ms" if row.latency_ms else "N/A"
|
|
274
|
+
thr_str = f"{row.throughput_fps:.1f} fps" if row.throughput_fps else "N/A"
|
|
275
|
+
cost_str = f"${row.cost_per_month_usd:.0f}" if row.cost_per_month_usd else "N/A"
|
|
276
|
+
|
|
277
|
+
lines.append(
|
|
278
|
+
f"| {row.model_id} | {params_str} | {flops_str} | {size_str} | "
|
|
279
|
+
f"{acc_str} | {lat_str} | {thr_str} | {cost_str} |"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
return "\n".join(lines)
|
|
283
|
+
|
|
284
|
+
def to_console(self) -> str:
|
|
285
|
+
"""
|
|
286
|
+
Generate a console-friendly table with rich formatting.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Formatted table string for terminal output.
|
|
290
|
+
"""
|
|
291
|
+
if not self.rows:
|
|
292
|
+
return "No models to compare"
|
|
293
|
+
|
|
294
|
+
# Calculate column widths
|
|
295
|
+
headers = ["Model", "Params", "FLOPs", "Size", "Accuracy", "Latency", "$/Month"]
|
|
296
|
+
rows_data = []
|
|
297
|
+
|
|
298
|
+
for row in self.rows:
|
|
299
|
+
rows_data.append(
|
|
300
|
+
[
|
|
301
|
+
row.model_id[:20], # Truncate long names
|
|
302
|
+
_format_number(row.params_total),
|
|
303
|
+
_format_number(row.flops_total),
|
|
304
|
+
f"{row.model_size_mb:.1f}MB",
|
|
305
|
+
f"{row.primary_metric_value:.1f}%" if row.primary_metric_value else "N/A",
|
|
306
|
+
f"{row.latency_ms:.1f}ms" if row.latency_ms else "N/A",
|
|
307
|
+
f"${row.cost_per_month_usd:.0f}" if row.cost_per_month_usd else "N/A",
|
|
308
|
+
]
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Calculate column widths
|
|
312
|
+
widths = [len(h) for h in headers]
|
|
313
|
+
for rd in rows_data:
|
|
314
|
+
for i, cell in enumerate(rd):
|
|
315
|
+
widths[i] = max(widths[i], len(cell))
|
|
316
|
+
|
|
317
|
+
# Build table
|
|
318
|
+
def row_str(cells: list[str]) -> str:
|
|
319
|
+
return " | ".join(c.ljust(widths[i]) for i, c in enumerate(cells))
|
|
320
|
+
|
|
321
|
+
lines = [
|
|
322
|
+
f"\n{self.title}",
|
|
323
|
+
"=" * (sum(widths) + len(widths) * 3),
|
|
324
|
+
row_str(headers),
|
|
325
|
+
"-" * (sum(widths) + len(widths) * 3),
|
|
326
|
+
]
|
|
327
|
+
for rd in rows_data:
|
|
328
|
+
lines.append(row_str(rd))
|
|
329
|
+
lines.append("=" * (sum(widths) + len(widths) * 3))
|
|
330
|
+
|
|
331
|
+
return "\n".join(lines)
|
|
332
|
+
|
|
333
|
+
def save_csv(self, path: str | Path) -> None:
|
|
334
|
+
"""Save comparison to CSV file."""
|
|
335
|
+
Path(path).write_text(self.to_csv(), encoding="utf-8")
|
|
336
|
+
|
|
337
|
+
def save_json(self, path: str | Path) -> None:
|
|
338
|
+
"""Save comparison to JSON file."""
|
|
339
|
+
Path(path).write_text(self.to_json(), encoding="utf-8")
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def _format_number(n: int | float) -> str:
|
|
343
|
+
"""Format large numbers with K/M/B suffixes."""
|
|
344
|
+
if n >= 1e9:
|
|
345
|
+
return f"{n / 1e9:.1f}B"
|
|
346
|
+
elif n >= 1e6:
|
|
347
|
+
return f"{n / 1e6:.1f}M"
|
|
348
|
+
elif n >= 1e3:
|
|
349
|
+
return f"{n / 1e3:.1f}K"
|
|
350
|
+
else:
|
|
351
|
+
return str(int(n))
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def compare_models(
|
|
355
|
+
reports: list[CombinedReport],
|
|
356
|
+
scenario: DeploymentScenario | None = None,
|
|
357
|
+
sort_by: str = "primary_metric_value",
|
|
358
|
+
sort_descending: bool = True,
|
|
359
|
+
title: str = "Model Comparison",
|
|
360
|
+
) -> ModelComparisonTable:
|
|
361
|
+
"""
|
|
362
|
+
Compare multiple models and generate a comparison table.
|
|
363
|
+
|
|
364
|
+
This is the main entry point for model comparison.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
reports: List of CombinedReport objects to compare.
|
|
368
|
+
scenario: Optional deployment scenario for cost calculation.
|
|
369
|
+
sort_by: Metric to sort by (default: primary accuracy).
|
|
370
|
+
sort_descending: Sort order (default: highest first).
|
|
371
|
+
title: Table title.
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
ModelComparisonTable with all models.
|
|
375
|
+
|
|
376
|
+
Example:
|
|
377
|
+
>>> reports = [report1, report2, report3]
|
|
378
|
+
>>> scenario = DeploymentScenario.realtime_video(fps=30)
|
|
379
|
+
>>> table = compare_models(reports, scenario)
|
|
380
|
+
>>> print(table.to_console())
|
|
381
|
+
"""
|
|
382
|
+
table = ModelComparisonTable(
|
|
383
|
+
title=title,
|
|
384
|
+
scenario=scenario,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
for report in reports:
|
|
388
|
+
table.add_model(report, scenario)
|
|
389
|
+
|
|
390
|
+
if sort_by:
|
|
391
|
+
table.sort_by(sort_by, reverse=sort_descending)
|
|
392
|
+
|
|
393
|
+
return table
|
|
394
|
+
|
|
395
|
+
|
|
396
|
+
def generate_eval_metrics_html(
|
|
397
|
+
eval_results: list[Any], # List of EvalResult
|
|
398
|
+
cost_estimate: DeploymentCostEstimate | None = None,
|
|
399
|
+
) -> str:
|
|
400
|
+
"""
|
|
401
|
+
Generate HTML section for eval metrics to embed in reports.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
eval_results: List of EvalResult objects.
|
|
405
|
+
cost_estimate: Optional deployment cost estimate.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
HTML string for the eval metrics section.
|
|
409
|
+
"""
|
|
410
|
+
if not eval_results and not cost_estimate:
|
|
411
|
+
return ""
|
|
412
|
+
|
|
413
|
+
html_parts = ['<section class="eval-metrics">']
|
|
414
|
+
html_parts.append("<h2>Evaluation Metrics</h2>")
|
|
415
|
+
|
|
416
|
+
# Metrics cards
|
|
417
|
+
if eval_results:
|
|
418
|
+
html_parts.append('<div class="metrics-cards">')
|
|
419
|
+
for result in eval_results:
|
|
420
|
+
if not result.metrics:
|
|
421
|
+
continue
|
|
422
|
+
|
|
423
|
+
# Find primary metric (first accuracy-type metric)
|
|
424
|
+
primary = None
|
|
425
|
+
for m in result.metrics:
|
|
426
|
+
if m.higher_is_better and m.category in ("accuracy", ""):
|
|
427
|
+
primary = m
|
|
428
|
+
break
|
|
429
|
+
if not primary and result.metrics:
|
|
430
|
+
primary = result.metrics[0]
|
|
431
|
+
|
|
432
|
+
if primary:
|
|
433
|
+
html_parts.append(
|
|
434
|
+
f"""
|
|
435
|
+
<div class="card">
|
|
436
|
+
<div class="card-value">{primary.value:.1f}{primary.unit}</div>
|
|
437
|
+
<div class="card-label">{primary.name}</div>
|
|
438
|
+
</div>
|
|
439
|
+
"""
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# Show task type
|
|
443
|
+
html_parts.append(
|
|
444
|
+
f"""
|
|
445
|
+
<div class="card">
|
|
446
|
+
<div class="card-value">{result.task_type}</div>
|
|
447
|
+
<div class="card-label">Task Type</div>
|
|
448
|
+
</div>
|
|
449
|
+
"""
|
|
450
|
+
)
|
|
451
|
+
html_parts.append("</div>")
|
|
452
|
+
|
|
453
|
+
# Detailed metrics table
|
|
454
|
+
html_parts.append("<h3>All Metrics</h3>")
|
|
455
|
+
html_parts.append("<table>")
|
|
456
|
+
html_parts.append("<tr><th>Metric</th><th>Value</th><th>Category</th></tr>")
|
|
457
|
+
for result in eval_results:
|
|
458
|
+
for m in result.metrics:
|
|
459
|
+
arrow = "↑" if m.higher_is_better else "↓"
|
|
460
|
+
html_parts.append(
|
|
461
|
+
f"<tr><td>{m.name} {arrow}</td><td>{m.value:.4f}{m.unit}</td>"
|
|
462
|
+
f"<td>{m.category}</td></tr>"
|
|
463
|
+
)
|
|
464
|
+
html_parts.append("</table>")
|
|
465
|
+
|
|
466
|
+
# Deployment cost section
|
|
467
|
+
if cost_estimate:
|
|
468
|
+
html_parts.append("<h3>Deployment Cost Estimate</h3>")
|
|
469
|
+
html_parts.append('<div class="metrics-cards">')
|
|
470
|
+
html_parts.append(
|
|
471
|
+
f"""
|
|
472
|
+
<div class="card">
|
|
473
|
+
<div class="card-value">${cost_estimate.cost_per_month_usd:.0f}</div>
|
|
474
|
+
<div class="card-label">$/Month</div>
|
|
475
|
+
</div>
|
|
476
|
+
<div class="card">
|
|
477
|
+
<div class="card-value">${cost_estimate.cost_per_1k_inferences_usd:.4f}</div>
|
|
478
|
+
<div class="card-label">$/1K Inferences</div>
|
|
479
|
+
</div>
|
|
480
|
+
<div class="card">
|
|
481
|
+
<div class="card-value">{cost_estimate.hardware_tier.name}</div>
|
|
482
|
+
<div class="card-label">Hardware</div>
|
|
483
|
+
</div>
|
|
484
|
+
<div class="card">
|
|
485
|
+
<div class="card-value">{cost_estimate.estimated_latency_ms:.1f}ms</div>
|
|
486
|
+
<div class="card-label">Latency</div>
|
|
487
|
+
</div>
|
|
488
|
+
"""
|
|
489
|
+
)
|
|
490
|
+
html_parts.append("</div>")
|
|
491
|
+
|
|
492
|
+
if cost_estimate.warnings:
|
|
493
|
+
html_parts.append('<div class="warnings">')
|
|
494
|
+
for warning in cost_estimate.warnings:
|
|
495
|
+
html_parts.append(f"<p>⚠️ {warning}</p>")
|
|
496
|
+
html_parts.append("</div>")
|
|
497
|
+
|
|
498
|
+
html_parts.append("</section>")
|
|
499
|
+
return "\n".join(html_parts)
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def compare_models_from_paths(
|
|
503
|
+
model_paths: list[str | Path],
|
|
504
|
+
eval_paths: list[str | Path] | None = None,
|
|
505
|
+
scenario: DeploymentScenario | None = None,
|
|
506
|
+
) -> ModelComparisonTable:
|
|
507
|
+
"""
|
|
508
|
+
Compare models from file paths.
|
|
509
|
+
|
|
510
|
+
Runs haoline analysis on each model and optionally imports eval results.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
model_paths: List of paths to model files.
|
|
514
|
+
eval_paths: Optional list of eval result files (matched by index).
|
|
515
|
+
scenario: Deployment scenario for cost calculation.
|
|
516
|
+
|
|
517
|
+
Returns:
|
|
518
|
+
ModelComparisonTable with comparison data.
|
|
519
|
+
"""
|
|
520
|
+
from .schemas import create_combined_report
|
|
521
|
+
|
|
522
|
+
reports = []
|
|
523
|
+
|
|
524
|
+
for i, model_path in enumerate(model_paths):
|
|
525
|
+
# Import eval if available
|
|
526
|
+
eval_results = None
|
|
527
|
+
if eval_paths and i < len(eval_paths):
|
|
528
|
+
from .adapters import detect_and_parse
|
|
529
|
+
|
|
530
|
+
eval_result = detect_and_parse(Path(eval_paths[i]))
|
|
531
|
+
if eval_result:
|
|
532
|
+
eval_results = [eval_result]
|
|
533
|
+
|
|
534
|
+
# Create combined report
|
|
535
|
+
report = create_combined_report(
|
|
536
|
+
str(model_path),
|
|
537
|
+
eval_results=eval_results,
|
|
538
|
+
run_inspection=True,
|
|
539
|
+
)
|
|
540
|
+
reports.append(report)
|
|
541
|
+
|
|
542
|
+
return compare_models(reports, scenario)
|