coreml-complexity-analyzer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- coreml_complexity_analyzer/__init__.py +29 -0
- coreml_complexity_analyzer/compute_cost_report.py +214 -0
- coreml_complexity_analyzer/flops_analyzer.py +550 -0
- coreml_complexity_analyzer/layer_profiler.py +408 -0
- coreml_complexity_analyzer/memory_estimator.py +365 -0
- coreml_complexity_analyzer-0.1.0.dist-info/METADATA +200 -0
- coreml_complexity_analyzer-0.1.0.dist-info/RECORD +10 -0
- coreml_complexity_analyzer-0.1.0.dist-info/WHEEL +5 -0
- coreml_complexity_analyzer-0.1.0.dist-info/licenses/LICENSE +28 -0
- coreml_complexity_analyzer-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
# Copyright (c) 2025, Devavarapu Yashwanth, Ireddi Rakshitha. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by a BSD-3-clause license that can be
|
|
4
|
+
# found in the LICENSE file.
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
FLOPS Analyzer for Core ML Models.
|
|
8
|
+
|
|
9
|
+
Provides floating-point operations (FLOPS) estimation for MIL operations
|
|
10
|
+
in Core ML models including convolutions, linear layers, matrix
|
|
11
|
+
multiplications, and element-wise operations.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Dict, List, Optional, Tuple, Any
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
import coremltools as ct
|
|
19
|
+
from coremltools.models import MLModel
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class LayerFLOPS:
|
|
24
|
+
"""FLOPS metrics for a single layer/operation."""
|
|
25
|
+
|
|
26
|
+
name: str
|
|
27
|
+
op_type: str
|
|
28
|
+
flops: int
|
|
29
|
+
mac_ops: int
|
|
30
|
+
input_shapes: List[Tuple[int, ...]]
|
|
31
|
+
output_shapes: List[Tuple[int, ...]]
|
|
32
|
+
attributes: Dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def gflops(self) -> float:
|
|
36
|
+
"""Return FLOPS in billions."""
|
|
37
|
+
return self.flops / 1e9
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def mflops(self) -> float:
|
|
41
|
+
"""Return FLOPS in millions."""
|
|
42
|
+
return self.flops / 1e6
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class FLOPSAnalyzer:
|
|
46
|
+
"""
|
|
47
|
+
Analyzer for estimating FLOPS of Core ML models.
|
|
48
|
+
|
|
49
|
+
Calculates the number of floating-point operations required to execute
|
|
50
|
+
each operation in a Core ML model. This is useful for:
|
|
51
|
+
|
|
52
|
+
- Comparing model efficiency before and after optimization
|
|
53
|
+
- Estimating inference latency on different hardware
|
|
54
|
+
- Identifying computational bottlenecks
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
model : MLModel or str
|
|
59
|
+
A loaded MLModel instance or path to a .mlmodel/.mlpackage file.
|
|
60
|
+
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
63
|
+
>>> import coremltools as ct
|
|
64
|
+
>>> from coreml_complexity_analyzer import FLOPSAnalyzer
|
|
65
|
+
>>>
|
|
66
|
+
>>> model = ct.models.MLModel("my_model.mlpackage")
|
|
67
|
+
>>> analyzer = FLOPSAnalyzer(model)
|
|
68
|
+
>>> total_flops = analyzer.get_total_flops()
|
|
69
|
+
>>> print(f"Total FLOPS: {total_flops:,}")
|
|
70
|
+
>>>
|
|
71
|
+
>>> breakdown = analyzer.get_layer_breakdown()
|
|
72
|
+
>>> for layer in breakdown[:5]:
|
|
73
|
+
... print(f"{layer.name}: {layer.mflops:.2f} MFLOPS")
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, model):
|
|
77
|
+
if isinstance(model, str):
|
|
78
|
+
self._model = MLModel(model)
|
|
79
|
+
else:
|
|
80
|
+
self._model = model
|
|
81
|
+
|
|
82
|
+
self._spec = self._model.get_spec()
|
|
83
|
+
self._layer_flops: List[LayerFLOPS] = []
|
|
84
|
+
self._analyzed = False
|
|
85
|
+
|
|
86
|
+
def analyze(self) -> List[LayerFLOPS]:
|
|
87
|
+
"""
|
|
88
|
+
Analyze the model and compute FLOPS for each operation.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
List[LayerFLOPS]
|
|
93
|
+
List of FLOPS information for each layer.
|
|
94
|
+
"""
|
|
95
|
+
self._layer_flops = []
|
|
96
|
+
|
|
97
|
+
if self._spec.WhichOneof("Type") == "mlProgram":
|
|
98
|
+
self._analyze_ml_program()
|
|
99
|
+
elif self._spec.WhichOneof("Type") in (
|
|
100
|
+
"neuralNetwork",
|
|
101
|
+
"neuralNetworkClassifier",
|
|
102
|
+
"neuralNetworkRegressor",
|
|
103
|
+
):
|
|
104
|
+
self._analyze_neural_network()
|
|
105
|
+
else:
|
|
106
|
+
raise ValueError(
|
|
107
|
+
f"Unsupported model type: {self._spec.WhichOneof('Type')}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
self._analyzed = True
|
|
111
|
+
return self._layer_flops
|
|
112
|
+
|
|
113
|
+
def get_total_flops(self) -> int:
|
|
114
|
+
"""
|
|
115
|
+
Get total FLOPS for the entire model.
|
|
116
|
+
|
|
117
|
+
Returns
|
|
118
|
+
-------
|
|
119
|
+
int
|
|
120
|
+
Total FLOPS count.
|
|
121
|
+
"""
|
|
122
|
+
if not self._analyzed:
|
|
123
|
+
self.analyze()
|
|
124
|
+
return sum(layer.flops for layer in self._layer_flops)
|
|
125
|
+
|
|
126
|
+
def get_total_mac_ops(self) -> int:
|
|
127
|
+
"""
|
|
128
|
+
Get total multiply-accumulate operations for the entire model.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
int
|
|
133
|
+
Total MAC operations count.
|
|
134
|
+
"""
|
|
135
|
+
if not self._analyzed:
|
|
136
|
+
self.analyze()
|
|
137
|
+
return sum(layer.mac_ops for layer in self._layer_flops)
|
|
138
|
+
|
|
139
|
+
def get_layer_breakdown(self) -> List[LayerFLOPS]:
|
|
140
|
+
"""
|
|
141
|
+
Get per-layer FLOPS breakdown sorted by FLOPS descending.
|
|
142
|
+
|
|
143
|
+
Returns
|
|
144
|
+
-------
|
|
145
|
+
List[LayerFLOPS]
|
|
146
|
+
Sorted list of LayerFLOPS objects.
|
|
147
|
+
"""
|
|
148
|
+
if not self._analyzed:
|
|
149
|
+
self.analyze()
|
|
150
|
+
return sorted(self._layer_flops, key=lambda x: x.flops, reverse=True)
|
|
151
|
+
|
|
152
|
+
def get_flops_by_op_type(self) -> Dict[str, int]:
|
|
153
|
+
"""
|
|
154
|
+
Get FLOPS aggregated by operation type.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
Dict[str, int]
|
|
159
|
+
Operation type to total FLOPS mapping.
|
|
160
|
+
"""
|
|
161
|
+
if not self._analyzed:
|
|
162
|
+
self.analyze()
|
|
163
|
+
|
|
164
|
+
flops_by_type: Dict[str, int] = {}
|
|
165
|
+
for layer in self._layer_flops:
|
|
166
|
+
flops_by_type[layer.op_type] = (
|
|
167
|
+
flops_by_type.get(layer.op_type, 0) + layer.flops
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return dict(sorted(flops_by_type.items(), key=lambda x: x[1], reverse=True))
|
|
171
|
+
|
|
172
|
+
def _analyze_ml_program(self):
|
|
173
|
+
"""Analyze FLOPS for ML Program format models."""
|
|
174
|
+
program = self._spec.mlProgram
|
|
175
|
+
|
|
176
|
+
for func in program.functions.values():
|
|
177
|
+
for block_name, block in func.block_specializations.items():
|
|
178
|
+
for op in block.operations:
|
|
179
|
+
layer_flops = self._compute_operation_flops(op)
|
|
180
|
+
if layer_flops.flops > 0:
|
|
181
|
+
self._layer_flops.append(layer_flops)
|
|
182
|
+
|
|
183
|
+
def _analyze_neural_network(self):
|
|
184
|
+
"""Analyze FLOPS for Neural Network format models."""
|
|
185
|
+
nn_spec = None
|
|
186
|
+
if self._spec.HasField("neuralNetwork"):
|
|
187
|
+
nn_spec = self._spec.neuralNetwork
|
|
188
|
+
elif self._spec.HasField("neuralNetworkClassifier"):
|
|
189
|
+
nn_spec = self._spec.neuralNetworkClassifier
|
|
190
|
+
elif self._spec.HasField("neuralNetworkRegressor"):
|
|
191
|
+
nn_spec = self._spec.neuralNetworkRegressor
|
|
192
|
+
|
|
193
|
+
if nn_spec is None:
|
|
194
|
+
return
|
|
195
|
+
|
|
196
|
+
for layer in nn_spec.layers:
|
|
197
|
+
layer_flops = self._compute_nn_layer_flops(layer)
|
|
198
|
+
if layer_flops.flops > 0:
|
|
199
|
+
self._layer_flops.append(layer_flops)
|
|
200
|
+
|
|
201
|
+
def _compute_operation_flops(self, op) -> LayerFLOPS:
|
|
202
|
+
"""Compute FLOPS for a single MIL operation."""
|
|
203
|
+
op_type = op.type
|
|
204
|
+
op_name = op.outputs[0].name if op.outputs else "unknown"
|
|
205
|
+
|
|
206
|
+
input_shapes = self._extract_input_shapes(op)
|
|
207
|
+
output_shapes = self._extract_output_shapes(op)
|
|
208
|
+
|
|
209
|
+
flops, mac_ops = self._calculate_flops_for_op_type(
|
|
210
|
+
op_type, op, input_shapes, output_shapes
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
return LayerFLOPS(
|
|
214
|
+
name=op_name,
|
|
215
|
+
op_type=op_type,
|
|
216
|
+
flops=flops,
|
|
217
|
+
mac_ops=mac_ops,
|
|
218
|
+
input_shapes=input_shapes,
|
|
219
|
+
output_shapes=output_shapes,
|
|
220
|
+
attributes=self._extract_attributes(op),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def _compute_nn_layer_flops(self, layer) -> LayerFLOPS:
|
|
224
|
+
"""Compute FLOPS for a Neural Network layer."""
|
|
225
|
+
layer_type = layer.WhichOneof("layer")
|
|
226
|
+
layer_name = layer.name
|
|
227
|
+
|
|
228
|
+
input_shapes = []
|
|
229
|
+
output_shapes = []
|
|
230
|
+
|
|
231
|
+
flops, mac_ops = self._calculate_flops_for_nn_layer(layer_type, layer)
|
|
232
|
+
|
|
233
|
+
return LayerFLOPS(
|
|
234
|
+
name=layer_name,
|
|
235
|
+
op_type=layer_type,
|
|
236
|
+
flops=flops,
|
|
237
|
+
mac_ops=mac_ops,
|
|
238
|
+
input_shapes=input_shapes,
|
|
239
|
+
output_shapes=output_shapes,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
def _calculate_flops_for_op_type(
|
|
243
|
+
self,
|
|
244
|
+
op_type: str,
|
|
245
|
+
op,
|
|
246
|
+
input_shapes: List[Tuple[int, ...]],
|
|
247
|
+
output_shapes: List[Tuple[int, ...]],
|
|
248
|
+
) -> Tuple[int, int]:
|
|
249
|
+
"""Calculate FLOPS and MAC ops based on operation type."""
|
|
250
|
+
|
|
251
|
+
dispatch = {
|
|
252
|
+
"conv": lambda: self._conv_flops(op, input_shapes, output_shapes),
|
|
253
|
+
"conv_transpose": lambda: self._conv_flops(op, input_shapes, output_shapes),
|
|
254
|
+
"linear": lambda: self._linear_flops(op, input_shapes, output_shapes),
|
|
255
|
+
"matmul": lambda: self._matmul_flops(input_shapes, output_shapes),
|
|
256
|
+
"softmax": lambda: self._softmax_flops(output_shapes),
|
|
257
|
+
"einsum": lambda: self._einsum_flops(op, input_shapes),
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
if op_type in dispatch:
|
|
261
|
+
return dispatch[op_type]()
|
|
262
|
+
elif op_type in ("add", "sub", "mul", "real_div"):
|
|
263
|
+
return self._elementwise_flops(output_shapes)
|
|
264
|
+
elif op_type in ("relu", "sigmoid", "tanh", "gelu", "silu", "softplus"):
|
|
265
|
+
return self._activation_flops(op_type, output_shapes)
|
|
266
|
+
elif op_type in ("reduce_sum", "reduce_mean", "reduce_max", "reduce_min"):
|
|
267
|
+
return self._reduce_flops(input_shapes, output_shapes)
|
|
268
|
+
elif op_type in ("batch_norm", "instance_norm", "layer_norm"):
|
|
269
|
+
return self._normalization_flops(op_type, output_shapes)
|
|
270
|
+
elif op_type in ("max_pool", "avg_pool"):
|
|
271
|
+
return self._pooling_flops(op, output_shapes)
|
|
272
|
+
elif op_type in ("squeeze", "expand_dims", "reshape", "transpose", "const"):
|
|
273
|
+
return (0, 0)
|
|
274
|
+
else:
|
|
275
|
+
return self._estimate_flops_from_output(output_shapes)
|
|
276
|
+
|
|
277
|
+
def _conv_flops(
|
|
278
|
+
self,
|
|
279
|
+
op,
|
|
280
|
+
input_shapes: List[Tuple[int, ...]],
|
|
281
|
+
output_shapes: List[Tuple[int, ...]],
|
|
282
|
+
) -> Tuple[int, int]:
|
|
283
|
+
"""Calculate FLOPS for convolution operations."""
|
|
284
|
+
if not output_shapes:
|
|
285
|
+
return (0, 0)
|
|
286
|
+
|
|
287
|
+
output_shape = output_shapes[0]
|
|
288
|
+
|
|
289
|
+
kernel_size = 1
|
|
290
|
+
in_channels = 1
|
|
291
|
+
groups = 1
|
|
292
|
+
|
|
293
|
+
for attr_name, attr_val in self._get_op_attributes(op).items():
|
|
294
|
+
if "kernel" in attr_name.lower() or "weight" in attr_name.lower():
|
|
295
|
+
if hasattr(attr_val, "__len__"):
|
|
296
|
+
kernel_size = int(np.prod(attr_val))
|
|
297
|
+
if "groups" in attr_name.lower():
|
|
298
|
+
groups = int(attr_val) if attr_val else 1
|
|
299
|
+
|
|
300
|
+
if input_shapes and len(input_shapes[0]) >= 2:
|
|
301
|
+
in_channels = input_shapes[0][1] if len(input_shapes[0]) > 1 else 1
|
|
302
|
+
|
|
303
|
+
output_elements = int(np.prod(output_shape))
|
|
304
|
+
mac_ops = output_elements * kernel_size * (in_channels // max(groups, 1))
|
|
305
|
+
flops = 2 * mac_ops
|
|
306
|
+
|
|
307
|
+
return (flops, mac_ops)
|
|
308
|
+
|
|
309
|
+
def _linear_flops(
|
|
310
|
+
self,
|
|
311
|
+
op,
|
|
312
|
+
input_shapes: List[Tuple[int, ...]],
|
|
313
|
+
output_shapes: List[Tuple[int, ...]],
|
|
314
|
+
) -> Tuple[int, int]:
|
|
315
|
+
"""Calculate FLOPS for linear/fully-connected operations."""
|
|
316
|
+
if not input_shapes or not output_shapes:
|
|
317
|
+
return (0, 0)
|
|
318
|
+
|
|
319
|
+
batch_size = 1
|
|
320
|
+
if len(input_shapes[0]) >= 1:
|
|
321
|
+
batch_size = input_shapes[0][0] if input_shapes[0][0] > 0 else 1
|
|
322
|
+
|
|
323
|
+
in_features = input_shapes[0][-1] if input_shapes[0] else 1
|
|
324
|
+
out_features = output_shapes[0][-1] if output_shapes[0] else 1
|
|
325
|
+
|
|
326
|
+
mac_ops = batch_size * in_features * out_features
|
|
327
|
+
flops = 2 * mac_ops
|
|
328
|
+
|
|
329
|
+
return (flops, mac_ops)
|
|
330
|
+
|
|
331
|
+
def _matmul_flops(
|
|
332
|
+
self,
|
|
333
|
+
input_shapes: List[Tuple[int, ...]],
|
|
334
|
+
output_shapes: List[Tuple[int, ...]],
|
|
335
|
+
) -> Tuple[int, int]:
|
|
336
|
+
"""Calculate FLOPS for matrix multiplication."""
|
|
337
|
+
if len(input_shapes) < 2:
|
|
338
|
+
return (0, 0)
|
|
339
|
+
|
|
340
|
+
shape_a = input_shapes[0]
|
|
341
|
+
shape_b = input_shapes[1]
|
|
342
|
+
|
|
343
|
+
if not shape_a or not shape_b:
|
|
344
|
+
return (0, 0)
|
|
345
|
+
|
|
346
|
+
m = shape_a[-2] if len(shape_a) >= 2 else 1
|
|
347
|
+
k = shape_a[-1] if len(shape_a) >= 1 else 1
|
|
348
|
+
n = shape_b[-1] if len(shape_b) >= 1 else 1
|
|
349
|
+
|
|
350
|
+
batch_dims = shape_a[:-2] if len(shape_a) > 2 else ()
|
|
351
|
+
batch_size = int(np.prod(batch_dims)) if batch_dims else 1
|
|
352
|
+
|
|
353
|
+
mac_ops = batch_size * m * n * k
|
|
354
|
+
flops = 2 * mac_ops
|
|
355
|
+
|
|
356
|
+
return (flops, mac_ops)
|
|
357
|
+
|
|
358
|
+
def _elementwise_flops(
|
|
359
|
+
self, output_shapes: List[Tuple[int, ...]]
|
|
360
|
+
) -> Tuple[int, int]:
|
|
361
|
+
"""Calculate FLOPS for element-wise operations."""
|
|
362
|
+
if not output_shapes:
|
|
363
|
+
return (0, 0)
|
|
364
|
+
|
|
365
|
+
total_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
366
|
+
return (total_elements, 0)
|
|
367
|
+
|
|
368
|
+
def _activation_flops(
|
|
369
|
+
self, op_type: str, output_shapes: List[Tuple[int, ...]]
|
|
370
|
+
) -> Tuple[int, int]:
|
|
371
|
+
"""Calculate FLOPS for activation functions."""
|
|
372
|
+
if not output_shapes:
|
|
373
|
+
return (0, 0)
|
|
374
|
+
|
|
375
|
+
total_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
376
|
+
|
|
377
|
+
flops_per_element = {
|
|
378
|
+
"relu": 1,
|
|
379
|
+
"sigmoid": 4,
|
|
380
|
+
"tanh": 4,
|
|
381
|
+
"gelu": 8,
|
|
382
|
+
"silu": 5,
|
|
383
|
+
"softplus": 3,
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
multiplier = flops_per_element.get(op_type, 1)
|
|
387
|
+
return (total_elements * multiplier, 0)
|
|
388
|
+
|
|
389
|
+
def _softmax_flops(
|
|
390
|
+
self, output_shapes: List[Tuple[int, ...]]
|
|
391
|
+
) -> Tuple[int, int]:
|
|
392
|
+
"""Calculate FLOPS for softmax operation."""
|
|
393
|
+
if not output_shapes:
|
|
394
|
+
return (0, 0)
|
|
395
|
+
|
|
396
|
+
total_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
397
|
+
flops = 5 * total_elements
|
|
398
|
+
return (flops, 0)
|
|
399
|
+
|
|
400
|
+
def _reduce_flops(
|
|
401
|
+
self,
|
|
402
|
+
input_shapes: List[Tuple[int, ...]],
|
|
403
|
+
output_shapes: List[Tuple[int, ...]],
|
|
404
|
+
) -> Tuple[int, int]:
|
|
405
|
+
"""Calculate FLOPS for reduction operations."""
|
|
406
|
+
if not input_shapes:
|
|
407
|
+
return (0, 0)
|
|
408
|
+
|
|
409
|
+
input_elements = sum(int(np.prod(s)) for s in input_shapes if s)
|
|
410
|
+
return (input_elements, 0)
|
|
411
|
+
|
|
412
|
+
def _normalization_flops(
|
|
413
|
+
self, op_type: str, output_shapes: List[Tuple[int, ...]]
|
|
414
|
+
) -> Tuple[int, int]:
|
|
415
|
+
"""Calculate FLOPS for normalization operations."""
|
|
416
|
+
if not output_shapes:
|
|
417
|
+
return (0, 0)
|
|
418
|
+
|
|
419
|
+
total_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
420
|
+
flops = 5 * total_elements
|
|
421
|
+
return (flops, 0)
|
|
422
|
+
|
|
423
|
+
def _pooling_flops(
|
|
424
|
+
self, op, output_shapes: List[Tuple[int, ...]]
|
|
425
|
+
) -> Tuple[int, int]:
|
|
426
|
+
"""Calculate FLOPS for pooling operations."""
|
|
427
|
+
if not output_shapes:
|
|
428
|
+
return (0, 0)
|
|
429
|
+
|
|
430
|
+
kernel_size = 9
|
|
431
|
+
|
|
432
|
+
for attr_name, attr_val in self._get_op_attributes(op).items():
|
|
433
|
+
if "kernel" in attr_name.lower():
|
|
434
|
+
if hasattr(attr_val, "__len__"):
|
|
435
|
+
kernel_size = int(np.prod(attr_val))
|
|
436
|
+
else:
|
|
437
|
+
kernel_size = int(attr_val) ** 2
|
|
438
|
+
|
|
439
|
+
output_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
440
|
+
flops = output_elements * kernel_size
|
|
441
|
+
return (flops, 0)
|
|
442
|
+
|
|
443
|
+
def _einsum_flops(
|
|
444
|
+
self, op, input_shapes: List[Tuple[int, ...]]
|
|
445
|
+
) -> Tuple[int, int]:
|
|
446
|
+
"""Estimate FLOPS for einsum operations."""
|
|
447
|
+
if len(input_shapes) < 2:
|
|
448
|
+
return (0, 0)
|
|
449
|
+
|
|
450
|
+
total_elements = 1
|
|
451
|
+
for shape in input_shapes:
|
|
452
|
+
if shape:
|
|
453
|
+
total_elements *= int(np.prod(shape))
|
|
454
|
+
|
|
455
|
+
estimated_flops = int(np.sqrt(total_elements))
|
|
456
|
+
return (estimated_flops, estimated_flops // 2)
|
|
457
|
+
|
|
458
|
+
def _estimate_flops_from_output(
|
|
459
|
+
self, output_shapes: List[Tuple[int, ...]]
|
|
460
|
+
) -> Tuple[int, int]:
|
|
461
|
+
"""Fallback FLOPS estimation based on output size."""
|
|
462
|
+
if not output_shapes:
|
|
463
|
+
return (0, 0)
|
|
464
|
+
|
|
465
|
+
total_elements = sum(int(np.prod(s)) for s in output_shapes if s)
|
|
466
|
+
return (total_elements, 0)
|
|
467
|
+
|
|
468
|
+
def _calculate_flops_for_nn_layer(
|
|
469
|
+
self, layer_type: str, layer
|
|
470
|
+
) -> Tuple[int, int]:
|
|
471
|
+
"""Calculate FLOPS for Neural Network layer types."""
|
|
472
|
+
if layer_type == "convolution":
|
|
473
|
+
conv = layer.convolution
|
|
474
|
+
output_channels = conv.outputChannels
|
|
475
|
+
kernel_channels = conv.kernelChannels
|
|
476
|
+
kernel_h = conv.kernelSize[0] if conv.kernelSize else 3
|
|
477
|
+
kernel_w = conv.kernelSize[1] if len(conv.kernelSize) > 1 else kernel_h
|
|
478
|
+
|
|
479
|
+
output_size = 1000
|
|
480
|
+
mac_ops = (
|
|
481
|
+
output_size * output_channels * kernel_channels * kernel_h * kernel_w
|
|
482
|
+
)
|
|
483
|
+
return (2 * mac_ops, mac_ops)
|
|
484
|
+
|
|
485
|
+
elif layer_type == "innerProduct":
|
|
486
|
+
ip = layer.innerProduct
|
|
487
|
+
in_features = ip.inputChannels
|
|
488
|
+
out_features = ip.outputChannels
|
|
489
|
+
mac_ops = in_features * out_features
|
|
490
|
+
return (2 * mac_ops, mac_ops)
|
|
491
|
+
|
|
492
|
+
elif layer_type in ("batchnorm", "instanceNormalization"):
|
|
493
|
+
return (1000, 0)
|
|
494
|
+
|
|
495
|
+
elif layer_type in ("activation", "softmax"):
|
|
496
|
+
return (1000, 0)
|
|
497
|
+
|
|
498
|
+
return (0, 0)
|
|
499
|
+
|
|
500
|
+
def _extract_input_shapes(self, op) -> List[Tuple[int, ...]]:
|
|
501
|
+
"""Extract input shapes from an operation."""
|
|
502
|
+
shapes = []
|
|
503
|
+
for inp in op.inputs.values():
|
|
504
|
+
if inp.arguments:
|
|
505
|
+
arg = inp.arguments[0]
|
|
506
|
+
if arg.HasField("type"):
|
|
507
|
+
shape = self._type_to_shape(arg.type)
|
|
508
|
+
if shape:
|
|
509
|
+
shapes.append(shape)
|
|
510
|
+
return shapes
|
|
511
|
+
|
|
512
|
+
def _extract_output_shapes(self, op) -> List[Tuple[int, ...]]:
|
|
513
|
+
"""Extract output shapes from an operation."""
|
|
514
|
+
shapes = []
|
|
515
|
+
for out in op.outputs:
|
|
516
|
+
if out.HasField("type"):
|
|
517
|
+
shape = self._type_to_shape(out.type)
|
|
518
|
+
if shape:
|
|
519
|
+
shapes.append(shape)
|
|
520
|
+
return shapes
|
|
521
|
+
|
|
522
|
+
def _type_to_shape(self, type_spec) -> Optional[Tuple[int, ...]]:
|
|
523
|
+
"""Convert a type specification to a shape tuple."""
|
|
524
|
+
if type_spec.HasField("tensorType"):
|
|
525
|
+
dims = []
|
|
526
|
+
for dim in type_spec.tensorType.dimensions:
|
|
527
|
+
if dim.HasField("constant"):
|
|
528
|
+
dims.append(dim.constant.size)
|
|
529
|
+
else:
|
|
530
|
+
dims.append(-1)
|
|
531
|
+
return tuple(dims)
|
|
532
|
+
return None
|
|
533
|
+
|
|
534
|
+
def _extract_attributes(self, op) -> Dict[str, Any]:
|
|
535
|
+
"""Extract attributes from an operation."""
|
|
536
|
+
attrs = {}
|
|
537
|
+
for attr_name, attr_val in op.attributes.items():
|
|
538
|
+
if attr_val.HasField("i"):
|
|
539
|
+
attrs[attr_name] = attr_val.i
|
|
540
|
+
elif attr_val.HasField("f"):
|
|
541
|
+
attrs[attr_name] = attr_val.f
|
|
542
|
+
elif attr_val.HasField("b"):
|
|
543
|
+
attrs[attr_name] = attr_val.b
|
|
544
|
+
elif attr_val.HasField("s"):
|
|
545
|
+
attrs[attr_name] = attr_val.s
|
|
546
|
+
return attrs
|
|
547
|
+
|
|
548
|
+
def _get_op_attributes(self, op) -> Dict[str, Any]:
|
|
549
|
+
"""Get all attributes from an operation."""
|
|
550
|
+
return self._extract_attributes(op)
|