haoline 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haoline/.streamlit/config.toml +10 -0
- haoline/__init__.py +248 -0
- haoline/analyzer.py +935 -0
- haoline/cli.py +2712 -0
- haoline/compare.py +811 -0
- haoline/compare_visualizations.py +1564 -0
- haoline/edge_analysis.py +525 -0
- haoline/eval/__init__.py +131 -0
- haoline/eval/adapters.py +844 -0
- haoline/eval/cli.py +390 -0
- haoline/eval/comparison.py +542 -0
- haoline/eval/deployment.py +633 -0
- haoline/eval/schemas.py +833 -0
- haoline/examples/__init__.py +15 -0
- haoline/examples/basic_inspection.py +74 -0
- haoline/examples/compare_models.py +117 -0
- haoline/examples/hardware_estimation.py +78 -0
- haoline/format_adapters.py +1001 -0
- haoline/formats/__init__.py +123 -0
- haoline/formats/coreml.py +250 -0
- haoline/formats/gguf.py +483 -0
- haoline/formats/openvino.py +255 -0
- haoline/formats/safetensors.py +273 -0
- haoline/formats/tflite.py +369 -0
- haoline/hardware.py +2307 -0
- haoline/hierarchical_graph.py +462 -0
- haoline/html_export.py +1573 -0
- haoline/layer_summary.py +769 -0
- haoline/llm_summarizer.py +465 -0
- haoline/op_icons.py +618 -0
- haoline/operational_profiling.py +1492 -0
- haoline/patterns.py +1116 -0
- haoline/pdf_generator.py +265 -0
- haoline/privacy.py +250 -0
- haoline/pydantic_models.py +241 -0
- haoline/report.py +1923 -0
- haoline/report_sections.py +539 -0
- haoline/risks.py +521 -0
- haoline/schema.py +523 -0
- haoline/streamlit_app.py +2024 -0
- haoline/tests/__init__.py +4 -0
- haoline/tests/conftest.py +123 -0
- haoline/tests/test_analyzer.py +868 -0
- haoline/tests/test_compare_visualizations.py +293 -0
- haoline/tests/test_edge_analysis.py +243 -0
- haoline/tests/test_eval.py +604 -0
- haoline/tests/test_format_adapters.py +460 -0
- haoline/tests/test_hardware.py +237 -0
- haoline/tests/test_hardware_recommender.py +90 -0
- haoline/tests/test_hierarchical_graph.py +326 -0
- haoline/tests/test_html_export.py +180 -0
- haoline/tests/test_layer_summary.py +428 -0
- haoline/tests/test_llm_patterns.py +540 -0
- haoline/tests/test_llm_summarizer.py +339 -0
- haoline/tests/test_patterns.py +774 -0
- haoline/tests/test_pytorch.py +327 -0
- haoline/tests/test_report.py +383 -0
- haoline/tests/test_risks.py +398 -0
- haoline/tests/test_schema.py +417 -0
- haoline/tests/test_tensorflow.py +380 -0
- haoline/tests/test_visualizations.py +316 -0
- haoline/universal_ir.py +856 -0
- haoline/visualizations.py +1086 -0
- haoline/visualize_yolo.py +44 -0
- haoline/web.py +110 -0
- haoline-0.3.0.dist-info/METADATA +471 -0
- haoline-0.3.0.dist-info/RECORD +70 -0
- haoline-0.3.0.dist-info/WHEEL +4 -0
- haoline-0.3.0.dist-info/entry_points.txt +5 -0
- haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
haoline/edge_analysis.py
ADDED
|
@@ -0,0 +1,525 @@
|
|
|
1
|
+
# Copyright (c) 2025 HaoLine Contributors
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Edge-Centric Analysis for graph visualization.
|
|
6
|
+
|
|
7
|
+
Task 5.6: Analyze tensor flow between nodes to identify bottlenecks,
|
|
8
|
+
memory hotspots, and data flow patterns.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import math
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .analyzer import GraphInfo
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# Data type sizes in bytes
|
|
23
|
+
DTYPE_SIZES: dict[str, int] = {
|
|
24
|
+
"float32": 4,
|
|
25
|
+
"float": 4,
|
|
26
|
+
"float16": 2,
|
|
27
|
+
"half": 2,
|
|
28
|
+
"bfloat16": 2,
|
|
29
|
+
"float64": 8,
|
|
30
|
+
"double": 8,
|
|
31
|
+
"int64": 8,
|
|
32
|
+
"int32": 4,
|
|
33
|
+
"int16": 2,
|
|
34
|
+
"int8": 1,
|
|
35
|
+
"uint8": 1,
|
|
36
|
+
"uint16": 2,
|
|
37
|
+
"uint32": 4,
|
|
38
|
+
"uint64": 8,
|
|
39
|
+
"bool": 1,
|
|
40
|
+
"string": 8, # Pointer size estimate
|
|
41
|
+
"complex64": 8,
|
|
42
|
+
"complex128": 16,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class EdgeInfo:
|
|
48
|
+
"""Information about an edge (tensor) between nodes."""
|
|
49
|
+
|
|
50
|
+
tensor_name: str
|
|
51
|
+
source_node: str | None # None if graph input
|
|
52
|
+
target_nodes: list[str] # Nodes consuming this tensor
|
|
53
|
+
shape: list[int | str] # Shape with possible symbolic dims
|
|
54
|
+
dtype: str
|
|
55
|
+
size_bytes: int # Total size in bytes
|
|
56
|
+
is_weight: bool # True if initializer/constant
|
|
57
|
+
precision: str # fp32, fp16, int8, etc.
|
|
58
|
+
|
|
59
|
+
# Analysis results
|
|
60
|
+
is_bottleneck: bool = False
|
|
61
|
+
is_skip_connection: bool = False
|
|
62
|
+
is_attention_qk: bool = False # Q @ K^T output (O(seq^2))
|
|
63
|
+
memory_intensity: float = 0.0 # 0-1 scale
|
|
64
|
+
|
|
65
|
+
def to_dict(self) -> dict:
|
|
66
|
+
return {
|
|
67
|
+
"tensor_name": self.tensor_name,
|
|
68
|
+
"source_node": self.source_node,
|
|
69
|
+
"target_nodes": self.target_nodes,
|
|
70
|
+
"shape": self.shape,
|
|
71
|
+
"dtype": self.dtype,
|
|
72
|
+
"size_bytes": self.size_bytes,
|
|
73
|
+
"is_weight": self.is_weight,
|
|
74
|
+
"precision": self.precision,
|
|
75
|
+
"is_bottleneck": self.is_bottleneck,
|
|
76
|
+
"is_skip_connection": self.is_skip_connection,
|
|
77
|
+
"is_attention_qk": self.is_attention_qk,
|
|
78
|
+
"memory_intensity": self.memory_intensity,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class EdgeAnalysisResult:
|
|
84
|
+
"""Complete edge analysis for a graph."""
|
|
85
|
+
|
|
86
|
+
edges: list[EdgeInfo]
|
|
87
|
+
total_activation_bytes: int
|
|
88
|
+
peak_activation_bytes: int
|
|
89
|
+
peak_activation_node: str | None
|
|
90
|
+
bottleneck_edges: list[str] # Tensor names of bottleneck edges
|
|
91
|
+
attention_edges: list[str] # O(seq^2) attention edges
|
|
92
|
+
skip_connection_edges: list[str]
|
|
93
|
+
|
|
94
|
+
# Memory profile along execution
|
|
95
|
+
memory_profile: list[tuple[str, int]] # (node_name, cumulative_memory)
|
|
96
|
+
|
|
97
|
+
def to_dict(self) -> dict:
|
|
98
|
+
return {
|
|
99
|
+
"total_activation_bytes": self.total_activation_bytes,
|
|
100
|
+
"peak_activation_bytes": self.peak_activation_bytes,
|
|
101
|
+
"peak_activation_node": self.peak_activation_node,
|
|
102
|
+
"bottleneck_edges": self.bottleneck_edges,
|
|
103
|
+
"attention_edges": self.attention_edges,
|
|
104
|
+
"skip_connection_edges": self.skip_connection_edges,
|
|
105
|
+
"num_edges": len(self.edges),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class EdgeAnalyzer:
|
|
110
|
+
"""
|
|
111
|
+
Analyze edges (tensors) in the computation graph.
|
|
112
|
+
|
|
113
|
+
Identifies memory bottlenecks, attention patterns, and skip connections.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(self, logger: logging.Logger | None = None):
|
|
117
|
+
self.logger = logger or logging.getLogger("haoline.edges")
|
|
118
|
+
|
|
119
|
+
def analyze(self, graph_info: GraphInfo) -> EdgeAnalysisResult:
|
|
120
|
+
"""
|
|
121
|
+
Perform complete edge analysis on a graph.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
graph_info: Parsed graph information from ONNXGraphLoader.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
EdgeAnalysisResult with all edge information and analysis.
|
|
128
|
+
"""
|
|
129
|
+
edges = self._extract_edges(graph_info)
|
|
130
|
+
self._analyze_bottlenecks(edges)
|
|
131
|
+
self._detect_skip_connections(edges, graph_info)
|
|
132
|
+
self._detect_attention_edges(edges, graph_info)
|
|
133
|
+
|
|
134
|
+
# Calculate memory profile
|
|
135
|
+
memory_profile = self._calculate_memory_profile(edges, graph_info)
|
|
136
|
+
|
|
137
|
+
# Find peak
|
|
138
|
+
peak_bytes = 0
|
|
139
|
+
peak_node = None
|
|
140
|
+
for node_name, mem in memory_profile:
|
|
141
|
+
if mem > peak_bytes:
|
|
142
|
+
peak_bytes = mem
|
|
143
|
+
peak_node = node_name
|
|
144
|
+
|
|
145
|
+
# Collect special edges
|
|
146
|
+
bottleneck_edges = [e.tensor_name for e in edges if e.is_bottleneck]
|
|
147
|
+
attention_edges = [e.tensor_name for e in edges if e.is_attention_qk]
|
|
148
|
+
skip_edges = [e.tensor_name for e in edges if e.is_skip_connection]
|
|
149
|
+
|
|
150
|
+
total_bytes = sum(e.size_bytes for e in edges if not e.is_weight)
|
|
151
|
+
|
|
152
|
+
return EdgeAnalysisResult(
|
|
153
|
+
edges=edges,
|
|
154
|
+
total_activation_bytes=total_bytes,
|
|
155
|
+
peak_activation_bytes=peak_bytes,
|
|
156
|
+
peak_activation_node=peak_node,
|
|
157
|
+
bottleneck_edges=bottleneck_edges,
|
|
158
|
+
attention_edges=attention_edges,
|
|
159
|
+
skip_connection_edges=skip_edges,
|
|
160
|
+
memory_profile=memory_profile,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def _extract_edges(self, graph_info: GraphInfo) -> list[EdgeInfo]:
|
|
164
|
+
"""Extract all edges (tensors) from the graph."""
|
|
165
|
+
edges: list[EdgeInfo] = []
|
|
166
|
+
tensor_to_consumers: dict[str, list[str]] = {}
|
|
167
|
+
|
|
168
|
+
# Build consumer map
|
|
169
|
+
for node in graph_info.nodes:
|
|
170
|
+
for inp in node.inputs:
|
|
171
|
+
if inp not in tensor_to_consumers:
|
|
172
|
+
tensor_to_consumers[inp] = []
|
|
173
|
+
tensor_to_consumers[inp].append(node.name)
|
|
174
|
+
|
|
175
|
+
# Process all tensors
|
|
176
|
+
processed: set[str] = set()
|
|
177
|
+
|
|
178
|
+
# Graph inputs
|
|
179
|
+
for name, shape in graph_info.input_shapes.items():
|
|
180
|
+
if name in processed:
|
|
181
|
+
continue
|
|
182
|
+
processed.add(name)
|
|
183
|
+
|
|
184
|
+
dtype = self._get_tensor_dtype(name, graph_info)
|
|
185
|
+
size_bytes = self._calculate_tensor_bytes(shape, dtype)
|
|
186
|
+
precision = self._dtype_to_precision(dtype)
|
|
187
|
+
|
|
188
|
+
edges.append(
|
|
189
|
+
EdgeInfo(
|
|
190
|
+
tensor_name=name,
|
|
191
|
+
source_node=None,
|
|
192
|
+
target_nodes=tensor_to_consumers.get(name, []),
|
|
193
|
+
shape=shape,
|
|
194
|
+
dtype=dtype,
|
|
195
|
+
size_bytes=size_bytes,
|
|
196
|
+
is_weight=False,
|
|
197
|
+
precision=precision,
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Initializers (weights)
|
|
202
|
+
for name, arr in graph_info.initializers.items():
|
|
203
|
+
if name in processed:
|
|
204
|
+
continue
|
|
205
|
+
processed.add(name)
|
|
206
|
+
|
|
207
|
+
shape = list(arr.shape)
|
|
208
|
+
dtype = str(arr.dtype)
|
|
209
|
+
size_bytes = arr.nbytes
|
|
210
|
+
precision = self._dtype_to_precision(dtype)
|
|
211
|
+
|
|
212
|
+
edges.append(
|
|
213
|
+
EdgeInfo(
|
|
214
|
+
tensor_name=name,
|
|
215
|
+
source_node=None,
|
|
216
|
+
target_nodes=tensor_to_consumers.get(name, []),
|
|
217
|
+
shape=shape,
|
|
218
|
+
dtype=dtype,
|
|
219
|
+
size_bytes=size_bytes,
|
|
220
|
+
is_weight=True,
|
|
221
|
+
precision=precision,
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Node outputs (activations)
|
|
226
|
+
for node in graph_info.nodes:
|
|
227
|
+
for output in node.outputs:
|
|
228
|
+
if output in processed:
|
|
229
|
+
continue
|
|
230
|
+
processed.add(output)
|
|
231
|
+
|
|
232
|
+
shape = graph_info.value_shapes.get(output, [])
|
|
233
|
+
dtype = self._get_tensor_dtype(output, graph_info)
|
|
234
|
+
size_bytes = self._calculate_tensor_bytes(shape, dtype)
|
|
235
|
+
precision = self._dtype_to_precision(dtype)
|
|
236
|
+
|
|
237
|
+
edges.append(
|
|
238
|
+
EdgeInfo(
|
|
239
|
+
tensor_name=output,
|
|
240
|
+
source_node=node.name,
|
|
241
|
+
target_nodes=tensor_to_consumers.get(output, []),
|
|
242
|
+
shape=shape,
|
|
243
|
+
dtype=dtype,
|
|
244
|
+
size_bytes=size_bytes,
|
|
245
|
+
is_weight=False,
|
|
246
|
+
precision=precision,
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
return edges
|
|
251
|
+
|
|
252
|
+
def _get_tensor_dtype(self, name: str, graph_info: GraphInfo) -> str:
|
|
253
|
+
"""Get dtype for a tensor."""
|
|
254
|
+
# Check if it's an initializer
|
|
255
|
+
if name in graph_info.initializers:
|
|
256
|
+
return str(graph_info.initializers[name].dtype)
|
|
257
|
+
|
|
258
|
+
# Default to float32 for activations
|
|
259
|
+
return "float32"
|
|
260
|
+
|
|
261
|
+
def _calculate_tensor_bytes(self, shape: list, dtype: str) -> int:
|
|
262
|
+
"""Calculate tensor size in bytes."""
|
|
263
|
+
# Handle symbolic dimensions
|
|
264
|
+
num_elements = 1
|
|
265
|
+
for dim in shape:
|
|
266
|
+
if isinstance(dim, int) and dim > 0:
|
|
267
|
+
num_elements *= dim
|
|
268
|
+
elif isinstance(dim, str):
|
|
269
|
+
# Symbolic dim - estimate as 1 for now
|
|
270
|
+
# Could use batch_size=1, seq_len=512 defaults
|
|
271
|
+
num_elements *= 1
|
|
272
|
+
else:
|
|
273
|
+
num_elements *= 1
|
|
274
|
+
|
|
275
|
+
# Get dtype size
|
|
276
|
+
dtype_lower = dtype.lower().replace("torch.", "").replace("numpy.", "")
|
|
277
|
+
element_bytes = DTYPE_SIZES.get(dtype_lower, 4) # Default to 4 bytes
|
|
278
|
+
|
|
279
|
+
return num_elements * element_bytes
|
|
280
|
+
|
|
281
|
+
def _dtype_to_precision(self, dtype: str) -> str:
|
|
282
|
+
"""Convert dtype string to precision category."""
|
|
283
|
+
dtype_lower = dtype.lower()
|
|
284
|
+
if "float32" in dtype_lower or dtype_lower == "float":
|
|
285
|
+
return "fp32"
|
|
286
|
+
elif "float16" in dtype_lower or "half" in dtype_lower:
|
|
287
|
+
return "fp16"
|
|
288
|
+
elif "bfloat16" in dtype_lower:
|
|
289
|
+
return "bf16"
|
|
290
|
+
elif "int8" in dtype_lower:
|
|
291
|
+
return "int8"
|
|
292
|
+
elif "uint8" in dtype_lower:
|
|
293
|
+
return "uint8"
|
|
294
|
+
elif "int4" in dtype_lower:
|
|
295
|
+
return "int4"
|
|
296
|
+
elif "int" in dtype_lower:
|
|
297
|
+
return "int32"
|
|
298
|
+
else:
|
|
299
|
+
return "fp32"
|
|
300
|
+
|
|
301
|
+
def _analyze_bottlenecks(self, edges: list[EdgeInfo]) -> None:
|
|
302
|
+
"""Mark edges that are memory bottlenecks."""
|
|
303
|
+
# Find max activation size (excluding weights)
|
|
304
|
+
activation_sizes = [e.size_bytes for e in edges if not e.is_weight]
|
|
305
|
+
if not activation_sizes:
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
max_size = max(activation_sizes)
|
|
309
|
+
max_size * 0.5 # Top 50% are potential bottlenecks
|
|
310
|
+
|
|
311
|
+
for edge in edges:
|
|
312
|
+
if edge.is_weight:
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
edge.memory_intensity = edge.size_bytes / max_size if max_size > 0 else 0
|
|
316
|
+
|
|
317
|
+
# Mark as bottleneck if in top 20%
|
|
318
|
+
if edge.size_bytes >= max_size * 0.8:
|
|
319
|
+
edge.is_bottleneck = True
|
|
320
|
+
|
|
321
|
+
def _detect_skip_connections(self, edges: list[EdgeInfo], graph_info: GraphInfo) -> None:
|
|
322
|
+
"""Detect skip connection edges."""
|
|
323
|
+
# Skip connections typically:
|
|
324
|
+
# 1. Go from earlier node to later Add node
|
|
325
|
+
# 2. Bypass multiple nodes
|
|
326
|
+
|
|
327
|
+
# Build node position map (topological order)
|
|
328
|
+
node_positions: dict[str, int] = {}
|
|
329
|
+
for i, node in enumerate(graph_info.nodes):
|
|
330
|
+
node_positions[node.name] = i
|
|
331
|
+
|
|
332
|
+
for edge in edges:
|
|
333
|
+
if edge.source_node is None:
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
source_pos = node_positions.get(edge.source_node, 0)
|
|
337
|
+
|
|
338
|
+
for target_name in edge.target_nodes:
|
|
339
|
+
target_pos = node_positions.get(target_name, 0)
|
|
340
|
+
|
|
341
|
+
# Check if target is an Add (residual connection point)
|
|
342
|
+
target_node = None
|
|
343
|
+
for n in graph_info.nodes:
|
|
344
|
+
if n.name == target_name:
|
|
345
|
+
target_node = n
|
|
346
|
+
break
|
|
347
|
+
|
|
348
|
+
if target_node and target_node.op_type == "Add":
|
|
349
|
+
# Check if there's a significant skip distance
|
|
350
|
+
if target_pos - source_pos >= 3:
|
|
351
|
+
edge.is_skip_connection = True
|
|
352
|
+
break
|
|
353
|
+
|
|
354
|
+
def _detect_attention_edges(self, edges: list[EdgeInfo], graph_info: GraphInfo) -> None:
|
|
355
|
+
"""Detect O(seq^2) attention edges (Q @ K^T output)."""
|
|
356
|
+
# Look for Softmax nodes and mark their input edges
|
|
357
|
+
for node in graph_info.nodes:
|
|
358
|
+
if node.op_type == "Softmax":
|
|
359
|
+
# The input to Softmax in attention is Q @ K^T (shape: [batch, heads, seq, seq])
|
|
360
|
+
for inp in node.inputs:
|
|
361
|
+
for edge in edges:
|
|
362
|
+
if edge.tensor_name == inp:
|
|
363
|
+
# Check if shape suggests attention (last two dims equal)
|
|
364
|
+
shape = edge.shape
|
|
365
|
+
if len(shape) >= 2:
|
|
366
|
+
# Handle symbolic dims
|
|
367
|
+
last = shape[-1]
|
|
368
|
+
second_last = shape[-2]
|
|
369
|
+
if last == second_last:
|
|
370
|
+
edge.is_attention_qk = True
|
|
371
|
+
elif (
|
|
372
|
+
isinstance(last, str)
|
|
373
|
+
and isinstance(second_last, str)
|
|
374
|
+
and last == second_last
|
|
375
|
+
):
|
|
376
|
+
edge.is_attention_qk = True
|
|
377
|
+
|
|
378
|
+
def _calculate_memory_profile(
|
|
379
|
+
self, edges: list[EdgeInfo], graph_info: GraphInfo
|
|
380
|
+
) -> list[tuple[str, int]]:
|
|
381
|
+
"""
|
|
382
|
+
Calculate memory usage at each point in execution.
|
|
383
|
+
|
|
384
|
+
Task 5.6.7: Calculate peak memory point in graph.
|
|
385
|
+
|
|
386
|
+
Returns list of (node_name, cumulative_memory) tuples.
|
|
387
|
+
"""
|
|
388
|
+
profile: list[tuple[str, int]] = []
|
|
389
|
+
live_tensors: dict[str, int] = {} # tensor_name -> size_bytes
|
|
390
|
+
|
|
391
|
+
# Build tensor lifecycle info
|
|
392
|
+
tensor_last_use: dict[str, str] = {} # tensor -> last consuming node
|
|
393
|
+
for node in graph_info.nodes:
|
|
394
|
+
for inp in node.inputs:
|
|
395
|
+
tensor_last_use[inp] = node.name
|
|
396
|
+
|
|
397
|
+
# Simulate execution
|
|
398
|
+
for node in graph_info.nodes:
|
|
399
|
+
# Add outputs of this node
|
|
400
|
+
for edge in edges:
|
|
401
|
+
if edge.source_node == node.name:
|
|
402
|
+
live_tensors[edge.tensor_name] = edge.size_bytes
|
|
403
|
+
|
|
404
|
+
# Calculate current memory
|
|
405
|
+
current_mem = sum(live_tensors.values())
|
|
406
|
+
profile.append((node.name, current_mem))
|
|
407
|
+
|
|
408
|
+
# Free tensors whose last use is this node
|
|
409
|
+
to_free = [t for t, last_node in tensor_last_use.items() if last_node == node.name]
|
|
410
|
+
for tensor in to_free:
|
|
411
|
+
live_tensors.pop(tensor, None)
|
|
412
|
+
|
|
413
|
+
return profile
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# Edge visualization helpers
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def compute_edge_thickness(size_bytes: int, min_width: float = 1, max_width: float = 10) -> float:
|
|
420
|
+
"""
|
|
421
|
+
Compute edge thickness based on tensor size.
|
|
422
|
+
|
|
423
|
+
Task 5.6.2: Map edge thickness to tensor size.
|
|
424
|
+
|
|
425
|
+
Uses log scale to handle the huge range of tensor sizes.
|
|
426
|
+
"""
|
|
427
|
+
if size_bytes <= 0:
|
|
428
|
+
return min_width
|
|
429
|
+
|
|
430
|
+
# Log scale: 1KB = min_width, 10GB = max_width
|
|
431
|
+
log_size = math.log10(max(size_bytes, 1))
|
|
432
|
+
log_min = 3 # 1KB
|
|
433
|
+
log_max = 10 # 10GB
|
|
434
|
+
|
|
435
|
+
t = (log_size - log_min) / (log_max - log_min)
|
|
436
|
+
t = max(0, min(1, t))
|
|
437
|
+
|
|
438
|
+
return min_width + t * (max_width - min_width)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
# Precision colors (Task 5.6.3)
|
|
442
|
+
PRECISION_EDGE_COLORS: dict[str, str] = {
|
|
443
|
+
"fp32": "#4A90D9", # Blue
|
|
444
|
+
"fp16": "#2ECC71", # Green
|
|
445
|
+
"bf16": "#9B59B6", # Purple
|
|
446
|
+
"int8": "#F1C40F", # Yellow
|
|
447
|
+
"int4": "#E67E22", # Orange
|
|
448
|
+
"uint8": "#F39C12", # Dark yellow
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def get_edge_color(edge: EdgeInfo) -> str:
|
|
453
|
+
"""
|
|
454
|
+
Get color for an edge based on its properties.
|
|
455
|
+
|
|
456
|
+
Task 5.6.3: Color edges by precision.
|
|
457
|
+
Task 5.6.4: Highlight memory bottleneck edges.
|
|
458
|
+
Task 5.6.8: Highlight O(seq^2) attention edges.
|
|
459
|
+
"""
|
|
460
|
+
# Priority: bottleneck > attention > skip > precision
|
|
461
|
+
if edge.is_bottleneck:
|
|
462
|
+
return "#E74C3C" # Red for bottlenecks
|
|
463
|
+
elif edge.is_attention_qk:
|
|
464
|
+
return "#E67E22" # Orange for O(seq^2) attention
|
|
465
|
+
elif edge.is_skip_connection:
|
|
466
|
+
return "#27AE60" # Dark green for skip connections (dashed)
|
|
467
|
+
else:
|
|
468
|
+
return PRECISION_EDGE_COLORS.get(edge.precision, "#7F8C8D")
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def get_edge_style(edge: EdgeInfo) -> str:
|
|
472
|
+
"""Get edge line style."""
|
|
473
|
+
if edge.is_skip_connection:
|
|
474
|
+
return "dashed"
|
|
475
|
+
return "solid"
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def format_tensor_shape(shape: list) -> str:
|
|
479
|
+
"""
|
|
480
|
+
Format tensor shape for display.
|
|
481
|
+
|
|
482
|
+
Task 5.6.5: Show tensor shape on hover.
|
|
483
|
+
"""
|
|
484
|
+
if not shape:
|
|
485
|
+
return "[]"
|
|
486
|
+
|
|
487
|
+
parts = []
|
|
488
|
+
for dim in shape:
|
|
489
|
+
if isinstance(dim, int):
|
|
490
|
+
parts.append(str(dim))
|
|
491
|
+
else:
|
|
492
|
+
parts.append(str(dim))
|
|
493
|
+
|
|
494
|
+
return f"[{', '.join(parts)}]"
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def format_tensor_size(size_bytes: int) -> str:
|
|
498
|
+
"""Format tensor size for display."""
|
|
499
|
+
if size_bytes < 1024:
|
|
500
|
+
return f"{size_bytes} B"
|
|
501
|
+
elif size_bytes < 1024 * 1024:
|
|
502
|
+
return f"{size_bytes / 1024:.1f} KB"
|
|
503
|
+
elif size_bytes < 1024 * 1024 * 1024:
|
|
504
|
+
return f"{size_bytes / (1024 * 1024):.1f} MB"
|
|
505
|
+
else:
|
|
506
|
+
return f"{size_bytes / (1024 * 1024 * 1024):.2f} GB"
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def generate_edge_tooltip(edge: EdgeInfo) -> str:
|
|
510
|
+
"""Generate tooltip text for an edge."""
|
|
511
|
+
lines = [
|
|
512
|
+
f"Tensor: {edge.tensor_name}",
|
|
513
|
+
f"Shape: {format_tensor_shape(edge.shape)}",
|
|
514
|
+
f"Size: {format_tensor_size(edge.size_bytes)}",
|
|
515
|
+
f"Precision: {edge.precision}",
|
|
516
|
+
]
|
|
517
|
+
|
|
518
|
+
if edge.is_bottleneck:
|
|
519
|
+
lines.append("⚠️ Memory Bottleneck")
|
|
520
|
+
if edge.is_attention_qk:
|
|
521
|
+
lines.append("🔴 O(seq²) Attention")
|
|
522
|
+
if edge.is_skip_connection:
|
|
523
|
+
lines.append("⤴️ Skip Connection")
|
|
524
|
+
|
|
525
|
+
return "\n".join(lines)
|
haoline/eval/__init__.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
HaoLine Eval Import Module
|
|
3
|
+
|
|
4
|
+
Import evaluation results from external tools and combine with architecture analysis.
|
|
5
|
+
|
|
6
|
+
Supported adapters:
|
|
7
|
+
- Ultralytics YOLO (detection)
|
|
8
|
+
- HuggingFace evaluate (classification/NLP)
|
|
9
|
+
- lm-eval-harness (LLM benchmarks)
|
|
10
|
+
- timm (image classification)
|
|
11
|
+
- Generic CSV/JSON
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .adapters import (
|
|
15
|
+
detect_and_parse,
|
|
16
|
+
load_generic_csv,
|
|
17
|
+
load_generic_json,
|
|
18
|
+
load_hf_evaluate,
|
|
19
|
+
load_lm_eval,
|
|
20
|
+
load_timm_benchmark,
|
|
21
|
+
load_ultralytics_json,
|
|
22
|
+
parse_generic_csv,
|
|
23
|
+
parse_generic_json,
|
|
24
|
+
parse_hf_evaluate,
|
|
25
|
+
parse_lm_eval,
|
|
26
|
+
parse_timm_benchmark,
|
|
27
|
+
parse_ultralytics_val,
|
|
28
|
+
)
|
|
29
|
+
from .comparison import (
|
|
30
|
+
ModelComparisonRow,
|
|
31
|
+
ModelComparisonTable,
|
|
32
|
+
compare_models,
|
|
33
|
+
compare_models_from_paths,
|
|
34
|
+
generate_eval_metrics_html,
|
|
35
|
+
)
|
|
36
|
+
from .deployment import (
|
|
37
|
+
HARDWARE_TIERS,
|
|
38
|
+
CloudProvider,
|
|
39
|
+
DeploymentCostEstimate,
|
|
40
|
+
DeploymentScenario,
|
|
41
|
+
DeploymentTarget,
|
|
42
|
+
HardwareTier,
|
|
43
|
+
calculate_deployment_cost,
|
|
44
|
+
compare_deployment_costs,
|
|
45
|
+
estimate_cost_from_combined_report,
|
|
46
|
+
estimate_latency_from_flops,
|
|
47
|
+
get_hardware_tier,
|
|
48
|
+
list_hardware_tiers,
|
|
49
|
+
select_hardware_tier_for_latency,
|
|
50
|
+
)
|
|
51
|
+
from .schemas import (
|
|
52
|
+
ClassificationEvalResult,
|
|
53
|
+
CombinedReport,
|
|
54
|
+
DetectionEvalResult,
|
|
55
|
+
EvalMetric,
|
|
56
|
+
EvalResult,
|
|
57
|
+
GenericEvalResult,
|
|
58
|
+
LLMEvalResult,
|
|
59
|
+
NLPEvalResult,
|
|
60
|
+
SegmentationEvalResult,
|
|
61
|
+
TaskType,
|
|
62
|
+
compute_model_hash,
|
|
63
|
+
create_combined_report,
|
|
64
|
+
get_combined_report_schema,
|
|
65
|
+
get_eval_schema,
|
|
66
|
+
is_valid_task_type,
|
|
67
|
+
link_eval_to_model,
|
|
68
|
+
validate_eval_result,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
__all__ = [
|
|
72
|
+
# Schemas
|
|
73
|
+
"EvalMetric",
|
|
74
|
+
"EvalResult",
|
|
75
|
+
"DetectionEvalResult",
|
|
76
|
+
"ClassificationEvalResult",
|
|
77
|
+
"NLPEvalResult",
|
|
78
|
+
"LLMEvalResult",
|
|
79
|
+
"SegmentationEvalResult",
|
|
80
|
+
"GenericEvalResult",
|
|
81
|
+
"CombinedReport",
|
|
82
|
+
"TaskType",
|
|
83
|
+
# Schema utilities
|
|
84
|
+
"get_eval_schema",
|
|
85
|
+
"get_combined_report_schema",
|
|
86
|
+
"validate_eval_result",
|
|
87
|
+
"is_valid_task_type",
|
|
88
|
+
# Linking utilities
|
|
89
|
+
"compute_model_hash",
|
|
90
|
+
"link_eval_to_model",
|
|
91
|
+
"create_combined_report",
|
|
92
|
+
# Deployment cost
|
|
93
|
+
"DeploymentScenario",
|
|
94
|
+
"DeploymentTarget",
|
|
95
|
+
"CloudProvider",
|
|
96
|
+
"HardwareTier",
|
|
97
|
+
"DeploymentCostEstimate",
|
|
98
|
+
"HARDWARE_TIERS",
|
|
99
|
+
"get_hardware_tier",
|
|
100
|
+
"list_hardware_tiers",
|
|
101
|
+
"calculate_deployment_cost",
|
|
102
|
+
"compare_deployment_costs",
|
|
103
|
+
"estimate_latency_from_flops",
|
|
104
|
+
"select_hardware_tier_for_latency",
|
|
105
|
+
"estimate_cost_from_combined_report",
|
|
106
|
+
# Model comparison
|
|
107
|
+
"ModelComparisonRow",
|
|
108
|
+
"ModelComparisonTable",
|
|
109
|
+
"compare_models",
|
|
110
|
+
"compare_models_from_paths",
|
|
111
|
+
"generate_eval_metrics_html",
|
|
112
|
+
# Adapters - Ultralytics
|
|
113
|
+
"parse_ultralytics_val",
|
|
114
|
+
"load_ultralytics_json",
|
|
115
|
+
# Adapters - HuggingFace evaluate
|
|
116
|
+
"parse_hf_evaluate",
|
|
117
|
+
"load_hf_evaluate",
|
|
118
|
+
# Adapters - lm-eval-harness
|
|
119
|
+
"parse_lm_eval",
|
|
120
|
+
"load_lm_eval",
|
|
121
|
+
# Adapters - timm
|
|
122
|
+
"parse_timm_benchmark",
|
|
123
|
+
"load_timm_benchmark",
|
|
124
|
+
# Adapters - Generic
|
|
125
|
+
"parse_generic_json",
|
|
126
|
+
"load_generic_json",
|
|
127
|
+
"parse_generic_csv",
|
|
128
|
+
"load_generic_csv",
|
|
129
|
+
# Auto-detect
|
|
130
|
+
"detect_and_parse",
|
|
131
|
+
]
|