haoline 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. haoline/.streamlit/config.toml +10 -0
  2. haoline/__init__.py +248 -0
  3. haoline/analyzer.py +935 -0
  4. haoline/cli.py +2712 -0
  5. haoline/compare.py +811 -0
  6. haoline/compare_visualizations.py +1564 -0
  7. haoline/edge_analysis.py +525 -0
  8. haoline/eval/__init__.py +131 -0
  9. haoline/eval/adapters.py +844 -0
  10. haoline/eval/cli.py +390 -0
  11. haoline/eval/comparison.py +542 -0
  12. haoline/eval/deployment.py +633 -0
  13. haoline/eval/schemas.py +833 -0
  14. haoline/examples/__init__.py +15 -0
  15. haoline/examples/basic_inspection.py +74 -0
  16. haoline/examples/compare_models.py +117 -0
  17. haoline/examples/hardware_estimation.py +78 -0
  18. haoline/format_adapters.py +1001 -0
  19. haoline/formats/__init__.py +123 -0
  20. haoline/formats/coreml.py +250 -0
  21. haoline/formats/gguf.py +483 -0
  22. haoline/formats/openvino.py +255 -0
  23. haoline/formats/safetensors.py +273 -0
  24. haoline/formats/tflite.py +369 -0
  25. haoline/hardware.py +2307 -0
  26. haoline/hierarchical_graph.py +462 -0
  27. haoline/html_export.py +1573 -0
  28. haoline/layer_summary.py +769 -0
  29. haoline/llm_summarizer.py +465 -0
  30. haoline/op_icons.py +618 -0
  31. haoline/operational_profiling.py +1492 -0
  32. haoline/patterns.py +1116 -0
  33. haoline/pdf_generator.py +265 -0
  34. haoline/privacy.py +250 -0
  35. haoline/pydantic_models.py +241 -0
  36. haoline/report.py +1923 -0
  37. haoline/report_sections.py +539 -0
  38. haoline/risks.py +521 -0
  39. haoline/schema.py +523 -0
  40. haoline/streamlit_app.py +2024 -0
  41. haoline/tests/__init__.py +4 -0
  42. haoline/tests/conftest.py +123 -0
  43. haoline/tests/test_analyzer.py +868 -0
  44. haoline/tests/test_compare_visualizations.py +293 -0
  45. haoline/tests/test_edge_analysis.py +243 -0
  46. haoline/tests/test_eval.py +604 -0
  47. haoline/tests/test_format_adapters.py +460 -0
  48. haoline/tests/test_hardware.py +237 -0
  49. haoline/tests/test_hardware_recommender.py +90 -0
  50. haoline/tests/test_hierarchical_graph.py +326 -0
  51. haoline/tests/test_html_export.py +180 -0
  52. haoline/tests/test_layer_summary.py +428 -0
  53. haoline/tests/test_llm_patterns.py +540 -0
  54. haoline/tests/test_llm_summarizer.py +339 -0
  55. haoline/tests/test_patterns.py +774 -0
  56. haoline/tests/test_pytorch.py +327 -0
  57. haoline/tests/test_report.py +383 -0
  58. haoline/tests/test_risks.py +398 -0
  59. haoline/tests/test_schema.py +417 -0
  60. haoline/tests/test_tensorflow.py +380 -0
  61. haoline/tests/test_visualizations.py +316 -0
  62. haoline/universal_ir.py +856 -0
  63. haoline/visualizations.py +1086 -0
  64. haoline/visualize_yolo.py +44 -0
  65. haoline/web.py +110 -0
  66. haoline-0.3.0.dist-info/METADATA +471 -0
  67. haoline-0.3.0.dist-info/RECORD +70 -0
  68. haoline-0.3.0.dist-info/WHEEL +4 -0
  69. haoline-0.3.0.dist-info/entry_points.txt +5 -0
  70. haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,833 @@
1
+ """
2
+ Eval Result Schemas (Pydantic v2)
3
+
4
+ Task-agnostic and task-specific schemas for importing evaluation results
5
+ from external tools like Ultralytics, HuggingFace evaluate, lm-eval, etc.
6
+
7
+ All schemas use Pydantic for validation, serialization, and JSON Schema generation.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from datetime import datetime
13
+ from enum import Enum
14
+ from typing import Annotated, Any
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+
19
+ class TaskType(str, Enum):
20
+ """Supported evaluation task types."""
21
+
22
+ detection = "detection"
23
+ classification = "classification"
24
+ nlp = "nlp"
25
+ llm = "llm"
26
+ segmentation = "segmentation"
27
+ generic = "generic"
28
+
29
+
30
+ class EvalMetric(BaseModel):
31
+ """A single evaluation metric."""
32
+
33
+ name: Annotated[str, Field(description="Metric name, e.g., 'mAP@50', 'top1_accuracy'")]
34
+ value: Annotated[float, Field(description="The metric value")]
35
+ unit: Annotated[str, Field(default="", description="Unit, e.g., '%', 'ms', '' (dimensionless)")]
36
+ higher_is_better: Annotated[
37
+ bool, Field(default=True, description="Whether higher values are better")
38
+ ]
39
+ category: Annotated[
40
+ str, Field(default="", description="Category, e.g., 'accuracy', 'speed', 'size'")
41
+ ]
42
+
43
+
44
+ class EvalResult(BaseModel):
45
+ """
46
+ Base class for evaluation results.
47
+
48
+ Task-agnostic fields that all eval results share.
49
+ """
50
+
51
+ model_id: Annotated[str, Field(description="Identifier for the model (path, name, or hash)")]
52
+ task_type: Annotated[str, Field(description="Task type: detection, classification, etc.")]
53
+ timestamp: Annotated[str, Field(default="", description="ISO format timestamp of eval run")] = (
54
+ ""
55
+ )
56
+ dataset: Annotated[str, Field(default="", description="Dataset used for evaluation")] = ""
57
+ metrics: Annotated[
58
+ list[EvalMetric], Field(default_factory=list, description="List of evaluation metrics")
59
+ ]
60
+ metadata: Annotated[
61
+ dict[str, Any], Field(default_factory=dict, description="Tool-specific extras")
62
+ ]
63
+
64
+ def model_post_init(self, __context: Any) -> None:
65
+ """Set timestamp if not provided."""
66
+ if not self.timestamp:
67
+ object.__setattr__(self, "timestamp", datetime.now().isoformat())
68
+
69
+ def get_metric(self, name: str) -> EvalMetric | None:
70
+ """Get a metric by name."""
71
+ for m in self.metrics:
72
+ if m.name == name:
73
+ return m
74
+ return None
75
+
76
+ def get_metric_value(self, name: str, default: float = 0.0) -> float:
77
+ """Get a metric value by name, with default."""
78
+ m = self.get_metric(name)
79
+ return m.value if m else default
80
+
81
+ def to_json(self, indent: int = 2) -> str:
82
+ """Serialize to JSON string."""
83
+ result: str = self.model_dump_json(indent=indent)
84
+ return result
85
+
86
+ @classmethod
87
+ def from_json(cls, json_str: str) -> EvalResult:
88
+ """Deserialize from JSON string."""
89
+ result: EvalResult = cls.model_validate_json(json_str)
90
+ return result
91
+
92
+
93
+ # =============================================================================
94
+ # Task-Specific Schemas
95
+ # =============================================================================
96
+
97
+
98
+ class DetectionEvalResult(EvalResult):
99
+ """
100
+ Object detection evaluation results.
101
+
102
+ Standard metrics: mAP@50, mAP@50:95, precision, recall, F1 per class.
103
+ Compatible with: Ultralytics YOLO, Detectron2, MMDetection
104
+ """
105
+
106
+ task_type: str = "detection"
107
+
108
+ # Per-class metrics
109
+ class_metrics: Annotated[
110
+ dict[str, dict[str, float]],
111
+ Field(
112
+ default_factory=dict,
113
+ description="Per-class metrics, e.g., {'person': {'precision': 0.92}}",
114
+ ),
115
+ ]
116
+
117
+ # IoU thresholds used
118
+ iou_thresholds: Annotated[
119
+ list[float], Field(default_factory=lambda: [0.5, 0.75], description="IoU thresholds")
120
+ ]
121
+
122
+ # Confidence threshold
123
+ confidence_threshold: Annotated[float, Field(default=0.5, description="Confidence threshold")]
124
+
125
+ @classmethod
126
+ def create(
127
+ cls,
128
+ model_id: str,
129
+ dataset: str,
130
+ map50: float,
131
+ map50_95: float,
132
+ precision: float,
133
+ recall: float,
134
+ f1: float,
135
+ class_metrics: dict[str, dict[str, float]] | None = None,
136
+ **kwargs: Any,
137
+ ) -> DetectionEvalResult:
138
+ """Convenience constructor with standard detection metrics."""
139
+ metrics = [
140
+ EvalMetric(
141
+ name="mAP@50", value=map50, unit="%", higher_is_better=True, category="accuracy"
142
+ ),
143
+ EvalMetric(
144
+ name="mAP@50:95",
145
+ value=map50_95,
146
+ unit="%",
147
+ higher_is_better=True,
148
+ category="accuracy",
149
+ ),
150
+ EvalMetric(
151
+ name="precision",
152
+ value=precision,
153
+ unit="%",
154
+ higher_is_better=True,
155
+ category="accuracy",
156
+ ),
157
+ EvalMetric(
158
+ name="recall", value=recall, unit="%", higher_is_better=True, category="accuracy"
159
+ ),
160
+ EvalMetric(name="f1", value=f1, unit="%", higher_is_better=True, category="accuracy"),
161
+ ]
162
+ return cls(
163
+ model_id=model_id,
164
+ dataset=dataset,
165
+ metrics=metrics,
166
+ class_metrics=class_metrics or {},
167
+ **kwargs,
168
+ )
169
+
170
+
171
+ class ClassificationEvalResult(EvalResult):
172
+ """
173
+ Image/text classification evaluation results.
174
+
175
+ Standard metrics: top-1 accuracy, top-5 accuracy, per-class accuracy.
176
+ Compatible with: timm, torchvision, HuggingFace
177
+ """
178
+
179
+ task_type: str = "classification"
180
+
181
+ # Per-class accuracy
182
+ class_accuracy: Annotated[
183
+ dict[str, float],
184
+ Field(default_factory=dict, description="Per-class accuracy"),
185
+ ]
186
+
187
+ # Confusion matrix (optional)
188
+ confusion_matrix: Annotated[
189
+ list[list[int]] | None,
190
+ Field(default=None, description="Confusion matrix"),
191
+ ]
192
+ class_names: Annotated[
193
+ list[str], Field(default_factory=list, description="Class names for confusion matrix")
194
+ ]
195
+
196
+ @classmethod
197
+ def create(
198
+ cls,
199
+ model_id: str,
200
+ dataset: str,
201
+ top1_accuracy: float,
202
+ top5_accuracy: float,
203
+ class_accuracy: dict[str, float] | None = None,
204
+ **kwargs: Any,
205
+ ) -> ClassificationEvalResult:
206
+ """Convenience constructor with standard classification metrics."""
207
+ metrics = [
208
+ EvalMetric(
209
+ name="top1_accuracy",
210
+ value=top1_accuracy,
211
+ unit="%",
212
+ higher_is_better=True,
213
+ category="accuracy",
214
+ ),
215
+ EvalMetric(
216
+ name="top5_accuracy",
217
+ value=top5_accuracy,
218
+ unit="%",
219
+ higher_is_better=True,
220
+ category="accuracy",
221
+ ),
222
+ ]
223
+ return cls(
224
+ model_id=model_id,
225
+ dataset=dataset,
226
+ metrics=metrics,
227
+ class_accuracy=class_accuracy or {},
228
+ **kwargs,
229
+ )
230
+
231
+
232
+ class NLPEvalResult(EvalResult):
233
+ """
234
+ NLP task evaluation results.
235
+
236
+ Standard metrics: accuracy, F1, exact match, BLEU, ROUGE.
237
+ Compatible with: HuggingFace evaluate, SacreBLEU
238
+ """
239
+
240
+ task_type: str = "nlp"
241
+
242
+ # Task subtype
243
+ nlp_task: Annotated[
244
+ str,
245
+ Field(
246
+ default="",
247
+ description="NLP task: classification, ner, qa, translation, summarization",
248
+ ),
249
+ ] = ""
250
+
251
+ @classmethod
252
+ def create(
253
+ cls,
254
+ model_id: str,
255
+ dataset: str,
256
+ nlp_task: str,
257
+ accuracy: float | None = None,
258
+ f1: float | None = None,
259
+ exact_match: float | None = None,
260
+ bleu: float | None = None,
261
+ rouge_l: float | None = None,
262
+ **kwargs: Any,
263
+ ) -> NLPEvalResult:
264
+ """Convenience constructor with standard NLP metrics."""
265
+ metrics = []
266
+ if accuracy is not None:
267
+ metrics.append(
268
+ EvalMetric(
269
+ name="accuracy",
270
+ value=accuracy,
271
+ unit="%",
272
+ higher_is_better=True,
273
+ category="accuracy",
274
+ )
275
+ )
276
+ if f1 is not None:
277
+ metrics.append(
278
+ EvalMetric(
279
+ name="f1", value=f1, unit="%", higher_is_better=True, category="accuracy"
280
+ )
281
+ )
282
+ if exact_match is not None:
283
+ metrics.append(
284
+ EvalMetric(
285
+ name="exact_match",
286
+ value=exact_match,
287
+ unit="%",
288
+ higher_is_better=True,
289
+ category="accuracy",
290
+ )
291
+ )
292
+ if bleu is not None:
293
+ metrics.append(
294
+ EvalMetric(
295
+ name="bleu", value=bleu, unit="", higher_is_better=True, category="accuracy"
296
+ )
297
+ )
298
+ if rouge_l is not None:
299
+ metrics.append(
300
+ EvalMetric(
301
+ name="rouge_l",
302
+ value=rouge_l,
303
+ unit="",
304
+ higher_is_better=True,
305
+ category="accuracy",
306
+ )
307
+ )
308
+
309
+ return cls(
310
+ model_id=model_id,
311
+ dataset=dataset,
312
+ metrics=metrics,
313
+ nlp_task=nlp_task,
314
+ **kwargs,
315
+ )
316
+
317
+
318
+ class LLMEvalResult(EvalResult):
319
+ """
320
+ Large Language Model evaluation results.
321
+
322
+ Standard metrics: perplexity, MMLU, HellaSwag, TruthfulQA, etc.
323
+ Compatible with: lm-eval-harness, EleutherAI eval
324
+ """
325
+
326
+ task_type: str = "llm"
327
+
328
+ # Benchmark scores (0-100 or 0-1 depending on benchmark)
329
+ benchmark_scores: Annotated[
330
+ dict[str, float],
331
+ Field(
332
+ default_factory=dict,
333
+ description="Benchmark scores, e.g., {'mmlu': 0.72, 'hellaswag': 0.81}",
334
+ ),
335
+ ]
336
+
337
+ @classmethod
338
+ def create(
339
+ cls,
340
+ model_id: str,
341
+ perplexity: float | None = None,
342
+ mmlu: float | None = None,
343
+ hellaswag: float | None = None,
344
+ truthfulqa: float | None = None,
345
+ arc_challenge: float | None = None,
346
+ winogrande: float | None = None,
347
+ **kwargs: Any,
348
+ ) -> LLMEvalResult:
349
+ """Convenience constructor with standard LLM benchmarks."""
350
+ metrics = []
351
+ benchmark_scores = {}
352
+
353
+ if perplexity is not None:
354
+ metrics.append(
355
+ EvalMetric(
356
+ name="perplexity",
357
+ value=perplexity,
358
+ unit="",
359
+ higher_is_better=False,
360
+ category="accuracy",
361
+ )
362
+ )
363
+
364
+ benchmarks = {
365
+ "mmlu": mmlu,
366
+ "hellaswag": hellaswag,
367
+ "truthfulqa": truthfulqa,
368
+ "arc_challenge": arc_challenge,
369
+ "winogrande": winogrande,
370
+ }
371
+
372
+ for name, value in benchmarks.items():
373
+ if value is not None:
374
+ metrics.append(
375
+ EvalMetric(
376
+ name=name, value=value, unit="%", higher_is_better=True, category="accuracy"
377
+ )
378
+ )
379
+ benchmark_scores[name] = value
380
+
381
+ return cls(
382
+ model_id=model_id,
383
+ dataset="multiple",
384
+ metrics=metrics,
385
+ benchmark_scores=benchmark_scores,
386
+ **kwargs,
387
+ )
388
+
389
+
390
+ class SegmentationEvalResult(EvalResult):
391
+ """
392
+ Semantic/instance segmentation evaluation results.
393
+
394
+ Standard metrics: mIoU, dice coefficient, per-class IoU.
395
+ Compatible with: MMSegmentation, Detectron2
396
+ """
397
+
398
+ task_type: str = "segmentation"
399
+
400
+ # Per-class IoU
401
+ class_iou: Annotated[
402
+ dict[str, float],
403
+ Field(default_factory=dict, description="Per-class IoU values"),
404
+ ]
405
+
406
+ # Segmentation type
407
+ segmentation_type: Annotated[
408
+ str,
409
+ Field(default="semantic", description="Type: semantic, instance, or panoptic"),
410
+ ] = "semantic"
411
+
412
+ @classmethod
413
+ def create(
414
+ cls,
415
+ model_id: str,
416
+ dataset: str,
417
+ miou: float,
418
+ dice: float | None = None,
419
+ class_iou: dict[str, float] | None = None,
420
+ segmentation_type: str = "semantic",
421
+ **kwargs: Any,
422
+ ) -> SegmentationEvalResult:
423
+ """Convenience constructor with standard segmentation metrics."""
424
+ metrics = [
425
+ EvalMetric(
426
+ name="mIoU", value=miou, unit="%", higher_is_better=True, category="accuracy"
427
+ ),
428
+ ]
429
+ if dice is not None:
430
+ metrics.append(
431
+ EvalMetric(
432
+ name="dice", value=dice, unit="%", higher_is_better=True, category="accuracy"
433
+ )
434
+ )
435
+
436
+ return cls(
437
+ model_id=model_id,
438
+ dataset=dataset,
439
+ metrics=metrics,
440
+ class_iou=class_iou or {},
441
+ segmentation_type=segmentation_type,
442
+ **kwargs,
443
+ )
444
+
445
+
446
+ class GenericEvalResult(EvalResult):
447
+ """
448
+ Generic evaluation results with user-defined metrics.
449
+
450
+ Use this when no task-specific schema fits, or for custom evaluation tasks.
451
+ """
452
+
453
+ task_type: str = "generic"
454
+
455
+ # User can specify what metrics mean
456
+ metric_definitions: Annotated[
457
+ dict[str, str],
458
+ Field(
459
+ default_factory=dict,
460
+ description="Metric definitions, e.g., {'custom_score': 'Higher is better'}",
461
+ ),
462
+ ]
463
+
464
+ @classmethod
465
+ def create(
466
+ cls,
467
+ model_id: str,
468
+ dataset: str = "",
469
+ metrics: dict[str, float] | None = None,
470
+ metric_definitions: dict[str, str] | None = None,
471
+ higher_is_better: dict[str, bool] | None = None,
472
+ **kwargs: Any,
473
+ ) -> GenericEvalResult:
474
+ """Convenience constructor for generic metrics."""
475
+ metric_list = []
476
+ higher_map = higher_is_better or {}
477
+
478
+ for name, value in (metrics or {}).items():
479
+ metric_list.append(
480
+ EvalMetric(
481
+ name=name,
482
+ value=value,
483
+ unit="",
484
+ higher_is_better=higher_map.get(name, True),
485
+ category="custom",
486
+ )
487
+ )
488
+
489
+ return cls(
490
+ model_id=model_id,
491
+ dataset=dataset,
492
+ metrics=metric_list,
493
+ metric_definitions=metric_definitions or {},
494
+ **kwargs,
495
+ )
496
+
497
+
498
+ # =============================================================================
499
+ # Combined Report (Architecture + Eval)
500
+ # =============================================================================
501
+
502
+
503
+ class CombinedReport(BaseModel):
504
+ """
505
+ Combines architecture analysis with evaluation results.
506
+
507
+ Links an InspectionReport (model structure, FLOPs, params) with
508
+ EvalResult (accuracy, speed benchmarks) for unified comparison.
509
+ """
510
+
511
+ model_id: Annotated[str, Field(description="Model identifier")]
512
+ model_path: Annotated[str, Field(default="", description="Path to model file")]
513
+
514
+ # Architecture analysis (from haoline inspect)
515
+ architecture: Annotated[
516
+ dict[str, Any],
517
+ Field(
518
+ default_factory=dict,
519
+ description="Architecture summary: params_total, flops_total, etc.",
520
+ ),
521
+ ]
522
+
523
+ # Evaluation results (from external tools)
524
+ eval_results: Annotated[
525
+ list[EvalResult],
526
+ Field(default_factory=list, description="Evaluation results from external tools"),
527
+ ]
528
+
529
+ # Computed summaries
530
+ primary_accuracy_metric: Annotated[
531
+ str, Field(default="", description="Primary accuracy metric name")
532
+ ] = ""
533
+ primary_accuracy_value: Annotated[
534
+ float, Field(default=0.0, description="Primary accuracy metric value")
535
+ ] = 0.0
536
+
537
+ # Hardware estimates (from haoline)
538
+ hardware_profile: Annotated[str, Field(default="", description="Hardware profile name")] = ""
539
+ latency_ms: Annotated[float, Field(default=0.0, description="Latency in milliseconds")] = 0.0
540
+ throughput_fps: Annotated[
541
+ float, Field(default=0.0, description="Throughput in frames per second")
542
+ ] = 0.0
543
+
544
+ # Deployment cost (if calculated)
545
+ cost_per_day_usd: Annotated[
546
+ float, Field(default=0.0, description="Estimated cost per day in USD")
547
+ ] = 0.0
548
+ cost_per_month_usd: Annotated[
549
+ float, Field(default=0.0, description="Estimated cost per month in USD")
550
+ ] = 0.0
551
+
552
+ def add_eval_result(self, result: EvalResult) -> None:
553
+ """Add an evaluation result."""
554
+ self.eval_results.append(result)
555
+
556
+ def get_eval_by_task(self, task_type: str) -> EvalResult | None:
557
+ """Get eval result by task type."""
558
+ for r in self.eval_results:
559
+ if r.task_type == task_type:
560
+ return r
561
+ return None
562
+
563
+ def get_all_metrics(self) -> list[EvalMetric]:
564
+ """Get all metrics from all eval results."""
565
+ metrics = []
566
+ for r in self.eval_results:
567
+ metrics.extend(r.metrics)
568
+ return metrics
569
+
570
+ def to_json(self, indent: int = 2) -> str:
571
+ """Serialize to JSON string."""
572
+ result: str = self.model_dump_json(indent=indent)
573
+ return result
574
+
575
+ @classmethod
576
+ def from_inspection_report(
577
+ cls,
578
+ report: Any, # InspectionReport
579
+ model_path: str = "",
580
+ eval_results: list[EvalResult] | None = None,
581
+ ) -> CombinedReport:
582
+ """
583
+ Create from an InspectionReport.
584
+
585
+ Args:
586
+ report: InspectionReport from haoline.
587
+ model_path: Path to the model file.
588
+ eval_results: Optional list of eval results to attach.
589
+ """
590
+ from pathlib import Path
591
+
592
+ # Extract key architecture metrics
593
+ mem_bytes = 0
594
+ if report.memory_estimates:
595
+ mem_bytes = (
596
+ report.memory_estimates.model_size_bytes
597
+ + report.memory_estimates.peak_activation_bytes
598
+ )
599
+
600
+ arch_summary = {
601
+ "params_total": (report.param_counts.total if report.param_counts else 0),
602
+ "flops_total": (report.flop_counts.total if report.flop_counts else 0),
603
+ "memory_bytes": mem_bytes,
604
+ "model_size_bytes": (
605
+ report.memory_estimates.model_size_bytes if report.memory_estimates else 0
606
+ ),
607
+ "peak_activation_bytes": (
608
+ report.memory_estimates.peak_activation_bytes if report.memory_estimates else 0
609
+ ),
610
+ "architecture_type": report.architecture_type,
611
+ "num_nodes": (report.graph_summary.num_nodes if report.graph_summary else 0),
612
+ }
613
+
614
+ # Hardware estimates if available
615
+ hw_profile = ""
616
+ latency = 0.0
617
+ throughput = 0.0
618
+ if report.hardware_estimates:
619
+ hw_profile = report.hardware_profile.name if report.hardware_profile else ""
620
+ latency = getattr(report.hardware_estimates, "latency_ms", 0.0)
621
+ throughput = getattr(report.hardware_estimates, "throughput_samples_per_sec", 0.0)
622
+
623
+ # Model ID: use filename stem or path
624
+ model_id = ""
625
+ if model_path:
626
+ model_id = Path(model_path).stem
627
+ elif report.metadata:
628
+ model_id = Path(report.metadata.path).stem if report.metadata.path else ""
629
+
630
+ # Set primary accuracy from first eval result
631
+ primary_metric = ""
632
+ primary_value = 0.0
633
+ evals = eval_results or []
634
+ if evals and evals[0].metrics:
635
+ # Use first accuracy-type metric as primary
636
+ for m in evals[0].metrics:
637
+ if m.higher_is_better and m.category in ("accuracy", ""):
638
+ primary_metric = m.name
639
+ primary_value = m.value
640
+ break
641
+
642
+ return cls(
643
+ model_id=model_id,
644
+ model_path=model_path or (report.metadata.path if report.metadata else ""),
645
+ architecture=arch_summary,
646
+ eval_results=evals,
647
+ primary_accuracy_metric=primary_metric,
648
+ primary_accuracy_value=primary_value,
649
+ hardware_profile=hw_profile,
650
+ latency_ms=latency,
651
+ throughput_fps=throughput,
652
+ )
653
+
654
+
655
+ # =============================================================================
656
+ # Model Linking Utilities (Task 12.4.1)
657
+ # =============================================================================
658
+
659
+
660
+ def compute_model_hash(model_path: str, algorithm: str = "sha256") -> str:
661
+ """
662
+ Compute a hash of a model file for unique identification.
663
+
664
+ Args:
665
+ model_path: Path to the model file.
666
+ algorithm: Hash algorithm ("sha256", "md5", "sha1").
667
+
668
+ Returns:
669
+ Hex digest of the file hash.
670
+
671
+ Example:
672
+ >>> hash_id = compute_model_hash("model.onnx")
673
+ >>> print(hash_id[:12]) # First 12 chars as short ID
674
+ 'a1b2c3d4e5f6'
675
+ """
676
+ import hashlib
677
+ from pathlib import Path
678
+
679
+ path = Path(model_path)
680
+ if not path.exists():
681
+ raise FileNotFoundError(f"Model file not found: {model_path}")
682
+
683
+ hash_func = hashlib.new(algorithm)
684
+
685
+ # Read in chunks to handle large files
686
+ with open(path, "rb") as f:
687
+ for chunk in iter(lambda: f.read(8192), b""):
688
+ hash_func.update(chunk)
689
+
690
+ return hash_func.hexdigest()
691
+
692
+
693
+ def link_eval_to_model(
694
+ model_path: str,
695
+ eval_result: EvalResult,
696
+ use_hash: bool = False,
697
+ ) -> EvalResult:
698
+ """
699
+ Link an evaluation result to a model file.
700
+
701
+ Updates the eval_result's model_id to match the model file identifier
702
+ (either path or hash).
703
+
704
+ Args:
705
+ model_path: Path to the model file.
706
+ eval_result: EvalResult to link.
707
+ use_hash: If True, use file hash as model_id. If False, use filename.
708
+
709
+ Returns:
710
+ Updated EvalResult with linked model_id.
711
+
712
+ Example:
713
+ >>> eval_result = parse_ultralytics_val(data)
714
+ >>> linked = link_eval_to_model("yolov8n.onnx", eval_result)
715
+ >>> print(linked.model_id) # 'yolov8n'
716
+ """
717
+ from pathlib import Path
718
+
719
+ if use_hash:
720
+ model_id = compute_model_hash(model_path)[:12] # Short hash
721
+ else:
722
+ model_id = Path(model_path).stem
723
+
724
+ # Update the eval result's model_id
725
+ eval_result.model_id = model_id
726
+ eval_result.metadata["linked_model_path"] = model_path
727
+
728
+ return eval_result
729
+
730
+
731
+ def create_combined_report(
732
+ model_path: str,
733
+ eval_results: list[EvalResult] | None = None,
734
+ inspection_report: Any = None, # InspectionReport
735
+ run_inspection: bool = True,
736
+ ) -> CombinedReport:
737
+ """
738
+ Create a CombinedReport by linking model analysis with eval results.
739
+
740
+ If inspection_report is not provided and run_inspection is True,
741
+ runs haoline analysis on the model first.
742
+
743
+ Args:
744
+ model_path: Path to the model file.
745
+ eval_results: List of evaluation results to attach.
746
+ inspection_report: Pre-computed InspectionReport (optional).
747
+ run_inspection: Whether to run inspection if not provided.
748
+
749
+ Returns:
750
+ CombinedReport combining architecture analysis and eval metrics.
751
+
752
+ Example:
753
+ >>> # Import eval, then combine with architecture analysis
754
+ >>> eval_result = load_ultralytics_json("val_results.json")
755
+ >>> combined = create_combined_report("yolov8n.onnx", [eval_result])
756
+ >>> print(combined.architecture["params_total"])
757
+ >>> print(combined.eval_results[0].metrics[0].value)
758
+ """
759
+ from pathlib import Path
760
+
761
+ # Run inspection if needed
762
+ if inspection_report is None and run_inspection:
763
+ try:
764
+ from haoline.report import ModelInspector
765
+
766
+ inspector = ModelInspector()
767
+ inspection_report = inspector.inspect(Path(model_path))
768
+ except Exception as e:
769
+ # Can't import or run haoline - create minimal combined report
770
+ print(f"Warning: Could not run model inspection: {e}")
771
+ return CombinedReport(
772
+ model_id=Path(model_path).stem,
773
+ model_path=model_path,
774
+ architecture={},
775
+ eval_results=eval_results or [],
776
+ )
777
+
778
+ # Link eval results to model
779
+ linked_evals: list[EvalResult] = []
780
+ if eval_results:
781
+ for er in eval_results:
782
+ linked = link_eval_to_model(model_path, er)
783
+ linked_evals.append(linked)
784
+
785
+ # Create combined report
786
+ if inspection_report:
787
+ return CombinedReport.from_inspection_report(
788
+ inspection_report,
789
+ model_path=model_path,
790
+ eval_results=linked_evals,
791
+ )
792
+ else:
793
+ return CombinedReport(
794
+ model_id=Path(model_path).stem,
795
+ model_path=model_path,
796
+ architecture={},
797
+ eval_results=linked_evals,
798
+ )
799
+
800
+
801
+ # =============================================================================
802
+ # Schema Generation and Validation
803
+ # =============================================================================
804
+
805
+
806
+ def get_eval_schema() -> dict[str, Any]:
807
+ """Get JSON Schema for EvalResult."""
808
+ schema: dict[str, Any] = EvalResult.model_json_schema()
809
+ return schema
810
+
811
+
812
+ def get_combined_report_schema() -> dict[str, Any]:
813
+ """Get JSON Schema for CombinedReport."""
814
+ schema: dict[str, Any] = CombinedReport.model_json_schema()
815
+ return schema
816
+
817
+
818
+ def validate_eval_result(data: dict[str, Any]) -> bool:
819
+ """
820
+ Validate eval result data using Pydantic.
821
+
822
+ Returns True if valid, False otherwise.
823
+ """
824
+ try:
825
+ EvalResult.model_validate(data)
826
+ return True
827
+ except Exception:
828
+ return False
829
+
830
+
831
+ def is_valid_task_type(task_type: str) -> bool:
832
+ """Check if a task type is valid."""
833
+ return task_type in [t.value for t in TaskType]