haoline 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haoline/.streamlit/config.toml +10 -0
- haoline/__init__.py +248 -0
- haoline/analyzer.py +935 -0
- haoline/cli.py +2712 -0
- haoline/compare.py +811 -0
- haoline/compare_visualizations.py +1564 -0
- haoline/edge_analysis.py +525 -0
- haoline/eval/__init__.py +131 -0
- haoline/eval/adapters.py +844 -0
- haoline/eval/cli.py +390 -0
- haoline/eval/comparison.py +542 -0
- haoline/eval/deployment.py +633 -0
- haoline/eval/schemas.py +833 -0
- haoline/examples/__init__.py +15 -0
- haoline/examples/basic_inspection.py +74 -0
- haoline/examples/compare_models.py +117 -0
- haoline/examples/hardware_estimation.py +78 -0
- haoline/format_adapters.py +1001 -0
- haoline/formats/__init__.py +123 -0
- haoline/formats/coreml.py +250 -0
- haoline/formats/gguf.py +483 -0
- haoline/formats/openvino.py +255 -0
- haoline/formats/safetensors.py +273 -0
- haoline/formats/tflite.py +369 -0
- haoline/hardware.py +2307 -0
- haoline/hierarchical_graph.py +462 -0
- haoline/html_export.py +1573 -0
- haoline/layer_summary.py +769 -0
- haoline/llm_summarizer.py +465 -0
- haoline/op_icons.py +618 -0
- haoline/operational_profiling.py +1492 -0
- haoline/patterns.py +1116 -0
- haoline/pdf_generator.py +265 -0
- haoline/privacy.py +250 -0
- haoline/pydantic_models.py +241 -0
- haoline/report.py +1923 -0
- haoline/report_sections.py +539 -0
- haoline/risks.py +521 -0
- haoline/schema.py +523 -0
- haoline/streamlit_app.py +2024 -0
- haoline/tests/__init__.py +4 -0
- haoline/tests/conftest.py +123 -0
- haoline/tests/test_analyzer.py +868 -0
- haoline/tests/test_compare_visualizations.py +293 -0
- haoline/tests/test_edge_analysis.py +243 -0
- haoline/tests/test_eval.py +604 -0
- haoline/tests/test_format_adapters.py +460 -0
- haoline/tests/test_hardware.py +237 -0
- haoline/tests/test_hardware_recommender.py +90 -0
- haoline/tests/test_hierarchical_graph.py +326 -0
- haoline/tests/test_html_export.py +180 -0
- haoline/tests/test_layer_summary.py +428 -0
- haoline/tests/test_llm_patterns.py +540 -0
- haoline/tests/test_llm_summarizer.py +339 -0
- haoline/tests/test_patterns.py +774 -0
- haoline/tests/test_pytorch.py +327 -0
- haoline/tests/test_report.py +383 -0
- haoline/tests/test_risks.py +398 -0
- haoline/tests/test_schema.py +417 -0
- haoline/tests/test_tensorflow.py +380 -0
- haoline/tests/test_visualizations.py +316 -0
- haoline/universal_ir.py +856 -0
- haoline/visualizations.py +1086 -0
- haoline/visualize_yolo.py +44 -0
- haoline/web.py +110 -0
- haoline-0.3.0.dist-info/METADATA +471 -0
- haoline-0.3.0.dist-info/RECORD +70 -0
- haoline-0.3.0.dist-info/WHEEL +4 -0
- haoline-0.3.0.dist-info/entry_points.txt +5 -0
- haoline-0.3.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
# Copyright (c) 2025 HaoLine Contributors
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Tests for LLM-scale pattern detection with real models.
|
|
6
|
+
|
|
7
|
+
Task 5.4.8: Tests with BERT, GPT-2, LLaMA patterns.
|
|
8
|
+
|
|
9
|
+
These tests verify that our pattern detection works on real transformer
|
|
10
|
+
architectures. Models are downloaded from ONNX Model Zoo or HuggingFace.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
import urllib.request
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import onnx
|
|
22
|
+
import pytest
|
|
23
|
+
from onnx import TensorProto, helper
|
|
24
|
+
|
|
25
|
+
from ..analyzer import ONNXGraphLoader
|
|
26
|
+
from ..hierarchical_graph import HierarchicalGraphBuilder
|
|
27
|
+
from ..patterns import PatternAnalyzer
|
|
28
|
+
|
|
29
|
+
# URLs for test models from ONNX Model Zoo
|
|
30
|
+
MODEL_URLS = {
|
|
31
|
+
# BERT models
|
|
32
|
+
"bert-base": "https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squad/model/bertsquad-12.onnx",
|
|
33
|
+
# GPT-2 - using a smaller variant
|
|
34
|
+
"gpt2": "https://github.com/onnx/models/raw/main/text/machine_comprehension/gpt-2/model/gpt2-lm-head-10.onnx",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# Cache directory for downloaded models
|
|
38
|
+
CACHE_DIR = Path(tempfile.gettempdir()) / "haoline_test_models"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def download_model(name: str) -> Path | None:
|
|
42
|
+
"""Download a model from Model Zoo if not cached."""
|
|
43
|
+
if name not in MODEL_URLS:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
model_path = CACHE_DIR / f"{name}.onnx"
|
|
48
|
+
|
|
49
|
+
if model_path.exists():
|
|
50
|
+
return model_path
|
|
51
|
+
|
|
52
|
+
url = MODEL_URLS[name]
|
|
53
|
+
try:
|
|
54
|
+
print(f"Downloading {name} model from {url}...")
|
|
55
|
+
urllib.request.urlretrieve(url, model_path)
|
|
56
|
+
return model_path
|
|
57
|
+
except Exception as e:
|
|
58
|
+
print(f"Failed to download {name}: {e}")
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def create_mini_bert_model() -> onnx.ModelProto:
|
|
63
|
+
"""
|
|
64
|
+
Create a minimal BERT-like model for testing pattern detection.
|
|
65
|
+
|
|
66
|
+
This captures the key architectural elements:
|
|
67
|
+
- Layer normalization
|
|
68
|
+
- Multi-head attention (Q, K, V projections + attention)
|
|
69
|
+
- Feed-forward network (expand + activate + contract)
|
|
70
|
+
- Residual connections
|
|
71
|
+
"""
|
|
72
|
+
batch = 1
|
|
73
|
+
seq_len = 128
|
|
74
|
+
hidden = 768
|
|
75
|
+
num_heads = 12
|
|
76
|
+
head_dim = hidden // num_heads
|
|
77
|
+
ff_dim = 3072
|
|
78
|
+
|
|
79
|
+
# Inputs
|
|
80
|
+
X = helper.make_tensor_value_info("input", TensorProto.FLOAT, [batch, seq_len, hidden])
|
|
81
|
+
Y = helper.make_tensor_value_info("output", TensorProto.FLOAT, [batch, seq_len, hidden])
|
|
82
|
+
|
|
83
|
+
# Layer norm weights
|
|
84
|
+
ln1_gamma = helper.make_tensor(
|
|
85
|
+
"ln1_gamma",
|
|
86
|
+
TensorProto.FLOAT,
|
|
87
|
+
[hidden],
|
|
88
|
+
np.ones(hidden, dtype=np.float32).tolist(),
|
|
89
|
+
)
|
|
90
|
+
ln1_beta = helper.make_tensor(
|
|
91
|
+
"ln1_beta",
|
|
92
|
+
TensorProto.FLOAT,
|
|
93
|
+
[hidden],
|
|
94
|
+
np.zeros(hidden, dtype=np.float32).tolist(),
|
|
95
|
+
)
|
|
96
|
+
ln2_gamma = helper.make_tensor(
|
|
97
|
+
"ln2_gamma",
|
|
98
|
+
TensorProto.FLOAT,
|
|
99
|
+
[hidden],
|
|
100
|
+
np.ones(hidden, dtype=np.float32).tolist(),
|
|
101
|
+
)
|
|
102
|
+
ln2_beta = helper.make_tensor(
|
|
103
|
+
"ln2_beta",
|
|
104
|
+
TensorProto.FLOAT,
|
|
105
|
+
[hidden],
|
|
106
|
+
np.zeros(hidden, dtype=np.float32).tolist(),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Attention weights (Q, K, V, O projections)
|
|
110
|
+
wq = helper.make_tensor(
|
|
111
|
+
"wq",
|
|
112
|
+
TensorProto.FLOAT,
|
|
113
|
+
[hidden, hidden],
|
|
114
|
+
np.random.randn(hidden * hidden).astype(np.float32).tolist(),
|
|
115
|
+
)
|
|
116
|
+
wk = helper.make_tensor(
|
|
117
|
+
"wk",
|
|
118
|
+
TensorProto.FLOAT,
|
|
119
|
+
[hidden, hidden],
|
|
120
|
+
np.random.randn(hidden * hidden).astype(np.float32).tolist(),
|
|
121
|
+
)
|
|
122
|
+
wv = helper.make_tensor(
|
|
123
|
+
"wv",
|
|
124
|
+
TensorProto.FLOAT,
|
|
125
|
+
[hidden, hidden],
|
|
126
|
+
np.random.randn(hidden * hidden).astype(np.float32).tolist(),
|
|
127
|
+
)
|
|
128
|
+
wo = helper.make_tensor(
|
|
129
|
+
"wo",
|
|
130
|
+
TensorProto.FLOAT,
|
|
131
|
+
[hidden, hidden],
|
|
132
|
+
np.random.randn(hidden * hidden).astype(np.float32).tolist(),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# FFN weights
|
|
136
|
+
w1 = helper.make_tensor(
|
|
137
|
+
"w1",
|
|
138
|
+
TensorProto.FLOAT,
|
|
139
|
+
[hidden, ff_dim],
|
|
140
|
+
np.random.randn(hidden * ff_dim).astype(np.float32).tolist(),
|
|
141
|
+
)
|
|
142
|
+
w2 = helper.make_tensor(
|
|
143
|
+
"w2",
|
|
144
|
+
TensorProto.FLOAT,
|
|
145
|
+
[ff_dim, hidden],
|
|
146
|
+
np.random.randn(ff_dim * hidden).astype(np.float32).tolist(),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Scale factor for attention
|
|
150
|
+
scale_val = 1.0 / np.sqrt(head_dim)
|
|
151
|
+
scale = helper.make_tensor("scale", TensorProto.FLOAT, [], [scale_val])
|
|
152
|
+
|
|
153
|
+
nodes = [
|
|
154
|
+
# Pre-attention layer norm
|
|
155
|
+
helper.make_node(
|
|
156
|
+
"LayerNormalization",
|
|
157
|
+
["input", "ln1_gamma", "ln1_beta"],
|
|
158
|
+
["ln1_out"],
|
|
159
|
+
name="ln1",
|
|
160
|
+
epsilon=1e-5,
|
|
161
|
+
axis=-1,
|
|
162
|
+
),
|
|
163
|
+
# Q, K, V projections
|
|
164
|
+
helper.make_node("MatMul", ["ln1_out", "wq"], ["q"], name="q_proj"),
|
|
165
|
+
helper.make_node("MatMul", ["ln1_out", "wk"], ["k"], name="k_proj"),
|
|
166
|
+
helper.make_node("MatMul", ["ln1_out", "wv"], ["v"], name="v_proj"),
|
|
167
|
+
# Transpose K for attention
|
|
168
|
+
helper.make_node("Transpose", ["k"], ["k_t"], name="k_transpose", perm=[0, 2, 1]),
|
|
169
|
+
# Attention scores: Q @ K^T
|
|
170
|
+
helper.make_node("MatMul", ["q", "k_t"], ["attn_scores"], name="attn_matmul"),
|
|
171
|
+
# Scale attention scores
|
|
172
|
+
helper.make_node("Mul", ["attn_scores", "scale"], ["attn_scaled"], name="attn_scale"),
|
|
173
|
+
# Softmax
|
|
174
|
+
helper.make_node("Softmax", ["attn_scaled"], ["attn_weights"], name="softmax", axis=-1),
|
|
175
|
+
# Attention output: softmax(QK^T/sqrt(d)) @ V
|
|
176
|
+
helper.make_node("MatMul", ["attn_weights", "v"], ["attn_out"], name="attn_v_matmul"),
|
|
177
|
+
# Output projection
|
|
178
|
+
helper.make_node("MatMul", ["attn_out", "wo"], ["attn_proj"], name="o_proj"),
|
|
179
|
+
# Residual connection 1
|
|
180
|
+
helper.make_node("Add", ["input", "attn_proj"], ["res1"], name="residual1"),
|
|
181
|
+
# Pre-FFN layer norm
|
|
182
|
+
helper.make_node(
|
|
183
|
+
"LayerNormalization",
|
|
184
|
+
["res1", "ln2_gamma", "ln2_beta"],
|
|
185
|
+
["ln2_out"],
|
|
186
|
+
name="ln2",
|
|
187
|
+
epsilon=1e-5,
|
|
188
|
+
axis=-1,
|
|
189
|
+
),
|
|
190
|
+
# FFN: up projection
|
|
191
|
+
helper.make_node("MatMul", ["ln2_out", "w1"], ["ff_up"], name="ffn_up"),
|
|
192
|
+
# FFN: activation (GELU approximation via tanh)
|
|
193
|
+
helper.make_node("Gelu", ["ff_up"], ["ff_act"], name="ffn_gelu"),
|
|
194
|
+
# FFN: down projection
|
|
195
|
+
helper.make_node("MatMul", ["ff_act", "w2"], ["ff_down"], name="ffn_down"),
|
|
196
|
+
# Residual connection 2
|
|
197
|
+
helper.make_node("Add", ["res1", "ff_down"], ["output"], name="residual2"),
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
graph = helper.make_graph(
|
|
201
|
+
nodes,
|
|
202
|
+
"mini_bert",
|
|
203
|
+
[X],
|
|
204
|
+
[Y],
|
|
205
|
+
[ln1_gamma, ln1_beta, ln2_gamma, ln2_beta, wq, wk, wv, wo, w1, w2, scale],
|
|
206
|
+
)
|
|
207
|
+
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 20)])
|
|
208
|
+
return model
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def create_mini_gpt_model() -> onnx.ModelProto:
|
|
212
|
+
"""
|
|
213
|
+
Create a minimal GPT-like model (decoder-only transformer).
|
|
214
|
+
|
|
215
|
+
Key differences from BERT:
|
|
216
|
+
- Causal attention (masked)
|
|
217
|
+
- No encoder, decoder-only
|
|
218
|
+
- Uses pre-norm (LN before attention)
|
|
219
|
+
"""
|
|
220
|
+
batch = 1
|
|
221
|
+
seq_len = 64
|
|
222
|
+
hidden = 512
|
|
223
|
+
ff_dim = 2048
|
|
224
|
+
|
|
225
|
+
X = helper.make_tensor_value_info("input", TensorProto.FLOAT, [batch, seq_len, hidden])
|
|
226
|
+
Y = helper.make_tensor_value_info("output", TensorProto.FLOAT, [batch, seq_len, hidden])
|
|
227
|
+
|
|
228
|
+
# Weights (smaller than BERT for testing)
|
|
229
|
+
ln_gamma = helper.make_tensor(
|
|
230
|
+
"ln_gamma",
|
|
231
|
+
TensorProto.FLOAT,
|
|
232
|
+
[hidden],
|
|
233
|
+
np.ones(hidden, dtype=np.float32).tolist(),
|
|
234
|
+
)
|
|
235
|
+
ln_beta = helper.make_tensor(
|
|
236
|
+
"ln_beta",
|
|
237
|
+
TensorProto.FLOAT,
|
|
238
|
+
[hidden],
|
|
239
|
+
np.zeros(hidden, dtype=np.float32).tolist(),
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# Combined QKV projection (common in GPT implementations)
|
|
243
|
+
wqkv = helper.make_tensor(
|
|
244
|
+
"wqkv",
|
|
245
|
+
TensorProto.FLOAT,
|
|
246
|
+
[hidden, hidden * 3],
|
|
247
|
+
np.random.randn(hidden * hidden * 3).astype(np.float32).tolist(),
|
|
248
|
+
)
|
|
249
|
+
wo = helper.make_tensor(
|
|
250
|
+
"wo",
|
|
251
|
+
TensorProto.FLOAT,
|
|
252
|
+
[hidden, hidden],
|
|
253
|
+
np.random.randn(hidden * hidden).astype(np.float32).tolist(),
|
|
254
|
+
)
|
|
255
|
+
w1 = helper.make_tensor(
|
|
256
|
+
"w1",
|
|
257
|
+
TensorProto.FLOAT,
|
|
258
|
+
[hidden, ff_dim],
|
|
259
|
+
np.random.randn(hidden * ff_dim).astype(np.float32).tolist(),
|
|
260
|
+
)
|
|
261
|
+
w2 = helper.make_tensor(
|
|
262
|
+
"w2",
|
|
263
|
+
TensorProto.FLOAT,
|
|
264
|
+
[ff_dim, hidden],
|
|
265
|
+
np.random.randn(ff_dim * hidden).astype(np.float32).tolist(),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
nodes = [
|
|
269
|
+
# Pre-norm
|
|
270
|
+
helper.make_node(
|
|
271
|
+
"LayerNormalization",
|
|
272
|
+
["input", "ln_gamma", "ln_beta"],
|
|
273
|
+
["ln_out"],
|
|
274
|
+
name="pre_ln",
|
|
275
|
+
epsilon=1e-5,
|
|
276
|
+
axis=-1,
|
|
277
|
+
),
|
|
278
|
+
# Combined QKV projection (GPT style)
|
|
279
|
+
helper.make_node("MatMul", ["ln_out", "wqkv"], ["qkv"], name="qkv_proj"),
|
|
280
|
+
# Split into Q, K, V
|
|
281
|
+
helper.make_node(
|
|
282
|
+
"Split", ["qkv"], ["q", "k", "v"], name="qkv_split", axis=-1, num_outputs=3
|
|
283
|
+
),
|
|
284
|
+
# Transpose K
|
|
285
|
+
helper.make_node("Transpose", ["k"], ["k_t"], name="k_transpose", perm=[0, 2, 1]),
|
|
286
|
+
# Attention
|
|
287
|
+
helper.make_node("MatMul", ["q", "k_t"], ["attn_scores"], name="attn_qk"),
|
|
288
|
+
helper.make_node("Softmax", ["attn_scores"], ["attn_weights"], name="softmax", axis=-1),
|
|
289
|
+
helper.make_node("MatMul", ["attn_weights", "v"], ["attn_out"], name="attn_v"),
|
|
290
|
+
# Output projection
|
|
291
|
+
helper.make_node("MatMul", ["attn_out", "wo"], ["attn_proj"], name="out_proj"),
|
|
292
|
+
# Residual
|
|
293
|
+
helper.make_node("Add", ["input", "attn_proj"], ["res1"], name="residual1"),
|
|
294
|
+
# FFN with SwiGLU (approximated with Sigmoid + Mul)
|
|
295
|
+
helper.make_node("MatMul", ["res1", "w1"], ["ff_up"], name="ffn_up"),
|
|
296
|
+
helper.make_node("Silu", ["ff_up"], ["ff_act"], name="ffn_silu"),
|
|
297
|
+
helper.make_node("MatMul", ["ff_act", "w2"], ["ff_down"], name="ffn_down"),
|
|
298
|
+
# Final residual
|
|
299
|
+
helper.make_node("Add", ["res1", "ff_down"], ["output"], name="residual2"),
|
|
300
|
+
]
|
|
301
|
+
|
|
302
|
+
graph = helper.make_graph(nodes, "mini_gpt", [X], [Y], [ln_gamma, ln_beta, wqkv, wo, w1, w2])
|
|
303
|
+
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 20)])
|
|
304
|
+
return model
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class TestMiniBERT:
|
|
308
|
+
"""Test pattern detection on mini-BERT architecture."""
|
|
309
|
+
|
|
310
|
+
def test_attention_pattern_detection(self):
|
|
311
|
+
"""Test that attention patterns are detected in BERT-like model."""
|
|
312
|
+
model = create_mini_bert_model()
|
|
313
|
+
|
|
314
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
315
|
+
onnx.save(model, f.name)
|
|
316
|
+
model_path = Path(f.name)
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
loader = ONNXGraphLoader()
|
|
320
|
+
_, graph_info = loader.load(model_path)
|
|
321
|
+
|
|
322
|
+
analyzer = PatternAnalyzer()
|
|
323
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
324
|
+
arch_type = analyzer.classify_architecture(graph_info, blocks)
|
|
325
|
+
|
|
326
|
+
# Should detect transformer patterns
|
|
327
|
+
block_types = {b.block_type for b in blocks}
|
|
328
|
+
|
|
329
|
+
# Should have attention-related blocks
|
|
330
|
+
assert any("Attention" in bt or "MatMul" in bt for bt in block_types), (
|
|
331
|
+
f"Expected attention patterns, got: {block_types}"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
# Architecture should be transformer
|
|
335
|
+
assert arch_type == "transformer", f"Expected transformer, got {arch_type}"
|
|
336
|
+
|
|
337
|
+
finally:
|
|
338
|
+
model_path.unlink()
|
|
339
|
+
|
|
340
|
+
def test_ffn_pattern_detection(self):
|
|
341
|
+
"""Test that FFN/MLP patterns are detected."""
|
|
342
|
+
model = create_mini_bert_model()
|
|
343
|
+
|
|
344
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
345
|
+
onnx.save(model, f.name)
|
|
346
|
+
model_path = Path(f.name)
|
|
347
|
+
|
|
348
|
+
try:
|
|
349
|
+
loader = ONNXGraphLoader()
|
|
350
|
+
_, graph_info = loader.load(model_path)
|
|
351
|
+
|
|
352
|
+
analyzer = PatternAnalyzer()
|
|
353
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
354
|
+
|
|
355
|
+
# Check that blocks were detected (any type)
|
|
356
|
+
# The specific types depend on the pattern analyzer's configuration
|
|
357
|
+
assert len(blocks) >= 1, f"Expected at least one block, got {len(blocks)}"
|
|
358
|
+
|
|
359
|
+
# Verify we have MatMul ops in the graph (FFN uses MatMul)
|
|
360
|
+
matmul_ops = [n for n in graph_info.nodes if n.op_type == "MatMul"]
|
|
361
|
+
assert len(matmul_ops) >= 4, (
|
|
362
|
+
f"Expected multiple MatMul ops (Q,K,V,O projections + FFN), got {len(matmul_ops)}"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
finally:
|
|
366
|
+
model_path.unlink()
|
|
367
|
+
|
|
368
|
+
def test_residual_connections_detected(self):
|
|
369
|
+
"""Test that residual connections are detected."""
|
|
370
|
+
model = create_mini_bert_model()
|
|
371
|
+
|
|
372
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
373
|
+
onnx.save(model, f.name)
|
|
374
|
+
model_path = Path(f.name)
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
loader = ONNXGraphLoader()
|
|
378
|
+
_, graph_info = loader.load(model_path)
|
|
379
|
+
|
|
380
|
+
analyzer = PatternAnalyzer()
|
|
381
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
382
|
+
|
|
383
|
+
# Check for residual patterns
|
|
384
|
+
residual_blocks = [b for b in blocks if "Residual" in b.block_type]
|
|
385
|
+
|
|
386
|
+
# BERT has 2 residual connections per layer
|
|
387
|
+
assert len(residual_blocks) >= 1, (
|
|
388
|
+
f"Expected residual connections, got {len(residual_blocks)}"
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
finally:
|
|
392
|
+
model_path.unlink()
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
class TestMiniGPT:
|
|
396
|
+
"""Test pattern detection on mini-GPT architecture."""
|
|
397
|
+
|
|
398
|
+
def test_gpt_architecture_detection(self):
|
|
399
|
+
"""Test GPT-style decoder architecture detection."""
|
|
400
|
+
model = create_mini_gpt_model()
|
|
401
|
+
|
|
402
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
403
|
+
onnx.save(model, f.name)
|
|
404
|
+
model_path = Path(f.name)
|
|
405
|
+
|
|
406
|
+
try:
|
|
407
|
+
loader = ONNXGraphLoader()
|
|
408
|
+
_, graph_info = loader.load(model_path)
|
|
409
|
+
|
|
410
|
+
analyzer = PatternAnalyzer()
|
|
411
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
412
|
+
arch_type = analyzer.classify_architecture(graph_info, blocks)
|
|
413
|
+
|
|
414
|
+
# Should detect transformer
|
|
415
|
+
assert arch_type == "transformer", f"Expected transformer, got {arch_type}"
|
|
416
|
+
|
|
417
|
+
finally:
|
|
418
|
+
model_path.unlink()
|
|
419
|
+
|
|
420
|
+
def test_swiglu_detection(self):
|
|
421
|
+
"""Test SwiGLU activation pattern detection (used in LLaMA)."""
|
|
422
|
+
model = create_mini_gpt_model()
|
|
423
|
+
|
|
424
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
425
|
+
onnx.save(model, f.name)
|
|
426
|
+
model_path = Path(f.name)
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
loader = ONNXGraphLoader()
|
|
430
|
+
_, graph_info = loader.load(model_path)
|
|
431
|
+
|
|
432
|
+
# Check that Silu (component of SwiGLU) is present
|
|
433
|
+
op_types = {n.op_type for n in graph_info.nodes}
|
|
434
|
+
assert "Silu" in op_types, f"Expected Silu activation, got: {op_types}"
|
|
435
|
+
|
|
436
|
+
finally:
|
|
437
|
+
model_path.unlink()
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
class TestHierarchicalGraphWithTransformers:
|
|
441
|
+
"""Test hierarchical graph building with transformer models."""
|
|
442
|
+
|
|
443
|
+
def test_bert_hierarchy(self):
|
|
444
|
+
"""Test hierarchical graph construction for BERT."""
|
|
445
|
+
model = create_mini_bert_model()
|
|
446
|
+
|
|
447
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
448
|
+
onnx.save(model, f.name)
|
|
449
|
+
model_path = Path(f.name)
|
|
450
|
+
|
|
451
|
+
try:
|
|
452
|
+
loader = ONNXGraphLoader()
|
|
453
|
+
_, graph_info = loader.load(model_path)
|
|
454
|
+
|
|
455
|
+
analyzer = PatternAnalyzer()
|
|
456
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
457
|
+
|
|
458
|
+
builder = HierarchicalGraphBuilder()
|
|
459
|
+
hier_graph = builder.build(graph_info, blocks, "MiniBERT")
|
|
460
|
+
|
|
461
|
+
assert hier_graph.root is not None
|
|
462
|
+
assert hier_graph.root.name == "MiniBERT"
|
|
463
|
+
assert hier_graph.total_nodes > 0
|
|
464
|
+
|
|
465
|
+
# Should have blocks as children
|
|
466
|
+
assert len(hier_graph.root.children) > 0
|
|
467
|
+
|
|
468
|
+
finally:
|
|
469
|
+
model_path.unlink()
|
|
470
|
+
|
|
471
|
+
def test_layer_hierarchy(self):
|
|
472
|
+
"""Test 3-level hierarchy (Model -> Layers -> Blocks -> Ops)."""
|
|
473
|
+
model = create_mini_bert_model()
|
|
474
|
+
|
|
475
|
+
with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as f:
|
|
476
|
+
onnx.save(model, f.name)
|
|
477
|
+
model_path = Path(f.name)
|
|
478
|
+
|
|
479
|
+
try:
|
|
480
|
+
loader = ONNXGraphLoader()
|
|
481
|
+
_, graph_info = loader.load(model_path)
|
|
482
|
+
|
|
483
|
+
analyzer = PatternAnalyzer()
|
|
484
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
485
|
+
|
|
486
|
+
builder = HierarchicalGraphBuilder()
|
|
487
|
+
hier_graph = builder.build_layer_hierarchy(graph_info, blocks, "MiniBERT")
|
|
488
|
+
|
|
489
|
+
assert hier_graph.root is not None
|
|
490
|
+
assert hier_graph.depth >= 1
|
|
491
|
+
|
|
492
|
+
finally:
|
|
493
|
+
model_path.unlink()
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
@pytest.mark.skipif(
|
|
497
|
+
os.environ.get("SKIP_DOWNLOAD_TESTS", "1") == "1",
|
|
498
|
+
reason="Skipping download tests (set SKIP_DOWNLOAD_TESTS=0 to enable)",
|
|
499
|
+
)
|
|
500
|
+
class TestRealModelZoo:
|
|
501
|
+
"""Tests with real models from ONNX Model Zoo.
|
|
502
|
+
|
|
503
|
+
These tests download actual models and are disabled by default.
|
|
504
|
+
Set SKIP_DOWNLOAD_TESTS=0 to run them.
|
|
505
|
+
"""
|
|
506
|
+
|
|
507
|
+
def test_bert_squad(self):
|
|
508
|
+
"""Test with real BERT model from Model Zoo."""
|
|
509
|
+
model_path = download_model("bert-base")
|
|
510
|
+
if model_path is None:
|
|
511
|
+
pytest.skip("Could not download BERT model")
|
|
512
|
+
|
|
513
|
+
loader = ONNXGraphLoader()
|
|
514
|
+
_, graph_info = loader.load(model_path)
|
|
515
|
+
|
|
516
|
+
analyzer = PatternAnalyzer()
|
|
517
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
518
|
+
arch_type = analyzer.classify_architecture(graph_info, blocks)
|
|
519
|
+
|
|
520
|
+
assert arch_type == "transformer"
|
|
521
|
+
assert len(blocks) > 10 # BERT has many blocks
|
|
522
|
+
|
|
523
|
+
def test_gpt2(self):
|
|
524
|
+
"""Test with real GPT-2 model from Model Zoo."""
|
|
525
|
+
model_path = download_model("gpt2")
|
|
526
|
+
if model_path is None:
|
|
527
|
+
pytest.skip("Could not download GPT-2 model")
|
|
528
|
+
|
|
529
|
+
loader = ONNXGraphLoader()
|
|
530
|
+
_, graph_info = loader.load(model_path)
|
|
531
|
+
|
|
532
|
+
analyzer = PatternAnalyzer()
|
|
533
|
+
blocks = analyzer.group_into_blocks(graph_info)
|
|
534
|
+
arch_type = analyzer.classify_architecture(graph_info, blocks)
|
|
535
|
+
|
|
536
|
+
assert arch_type == "transformer"
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
if __name__ == "__main__":
|
|
540
|
+
pytest.main([__file__, "-v"])
|