mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
  3. model_compression_toolkit/__init__.py +2 -2
  4. model_compression_toolkit/core/common/framework_info.py +1 -3
  5. model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
  6. model_compression_toolkit/core/common/graph/base_graph.py +20 -21
  7. model_compression_toolkit/core/common/graph/base_node.py +44 -17
  8. model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
  9. model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
  10. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
  11. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
  12. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
  13. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
  14. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
  15. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
  16. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
  17. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
  18. model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
  19. model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
  20. model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
  21. model_compression_toolkit/core/common/pruning/pruner.py +5 -3
  22. model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
  23. model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
  24. model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
  25. model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
  26. model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
  27. model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
  28. model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
  29. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
  30. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
  31. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
  32. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
  33. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
  34. model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
  35. model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
  36. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
  37. model_compression_toolkit/core/graph_prep_runner.py +12 -11
  38. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  39. model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
  40. model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
  41. model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
  42. model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
  43. model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
  44. model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
  45. model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
  46. model_compression_toolkit/core/runner.py +33 -60
  47. model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
  48. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
  49. model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
  50. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  51. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  52. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  53. model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
  54. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  55. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  56. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  57. model_compression_toolkit/metadata.py +11 -10
  58. model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
  59. model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
  60. model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
  61. model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
  62. model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
  63. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
  64. model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  65. model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
  66. model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
  67. model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
  68. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  69. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
  70. model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
  71. model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
  72. model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
  73. model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
  74. model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
  75. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
  76. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
  77. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
  78. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
  79. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
  80. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
  81. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
  82. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
  83. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
  84. model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
  85. model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
  86. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
  87. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
  88. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
  89. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
  90. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
  91. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
  92. model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
  93. model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
  94. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
  95. model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
  96. model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
  97. model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
  98. model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
  99. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
  100. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
  101. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
  102. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
  103. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
  104. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
  105. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
  106. /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -0,0 +1,668 @@
1
+ # Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from collections import defaultdict
16
+ from copy import deepcopy
17
+ from enum import Enum, auto
18
+ from functools import lru_cache
19
+ from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence
20
+
21
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
22
+ from model_compression_toolkit.core import FrameworkInfo
23
+ from model_compression_toolkit.core.common import Graph, BaseNode
24
+ from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
25
+ from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
26
+ from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut
27
+ from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
28
+ from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
29
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
30
+ RUTarget, ResourceUtilization
31
+ from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
32
+ NodeActivationQuantizationConfig
33
+
34
+
35
+ class BitwidthMode(Enum):
36
+ """
37
+ Bit-width configuration for resource utilization computation.
38
+
39
+ Float: original un-quantized configuration. Assumed to be 32-bit float.
40
+ QMaxBit: maximal bit-width configurations. Assigns each node its maximal available precision according to the
41
+ target platform capabilities.
42
+ QMinBit: minimal bit-width configuration. Assigns each node its minimal available precision according to the
43
+ target platform capabilities.
44
+ QCustom: explicitly provided bit-width configuration.
45
+ QDefaultSP: default single-precision bit-width configuration. Can be used either in a single-precision mode,
46
+ or along with TargetInclusionCriterion.QNonConfigurable, which computes the resource utilization only for
47
+ single-precision nodes. To compute custom single precision configuration, use QCustom.
48
+ """
49
+ Float = auto()
50
+ Q8Bit = auto()
51
+ QMaxBit = auto()
52
+ QMinBit = auto()
53
+ QCustom = auto()
54
+ QDefaultSP = auto()
55
+
56
+
57
+ class TargetInclusionCriterion(Enum):
58
+ """
59
+ Target nodes / parameters to include for resource utilization computation.
60
+
61
+ QConfigurable: configurable for Mixed Precision targets (multiple quantization candidates).
62
+ QNonConfigurable: non-configurable targets (single quantization candidate).
63
+ AnyQuantized: any quantized targets (configurable and non-configurable).
64
+ Any: all targets (quantized + float).
65
+ """
66
+ QConfigurable = auto()
67
+ QNonConfigurable = auto()
68
+ AnyQuantized = auto()
69
+ Any = auto()
70
+
71
+
72
+ class Utilization(NamedTuple):
73
+ """
74
+ Utility container for a single resource utilization result.
75
+ Supports sum, max, min over an iterable of Utilization objects.
76
+
77
+ Args:
78
+ size: parameters or activation tensor(s) size.
79
+ bytes: memory utilization.
80
+ """
81
+ size: int
82
+ bytes: Optional[float]
83
+
84
+ def __add__(self, other: 'Utilization') -> 'Utilization':
85
+ return Utilization(self.size + other.size, self.bytes + other.bytes)
86
+
87
+ def __radd__(self, other: Union['Utilization', Literal[0]]):
88
+ # Needed for sum (with default start_value=0).
89
+ if other == 0:
90
+ return self
91
+ return self + other # pragma: no cover
92
+
93
+ def __gt__(self, other: 'Utilization'):
94
+ # Needed for max. Compare by bytes.
95
+ return self.bytes > other.bytes
96
+
97
+ def __lt__(self, other: 'Utilization'):
98
+ # Needed for min. Compare by bytes.
99
+ return self.bytes < other.bytes # pragma: no cover
100
+
101
+
102
+ class ResourceUtilizationCalculator:
103
+ """ Resource utilization calculator. """
104
+
105
+ _bitwidth_mode_fn = {
106
+ BitwidthMode.QMaxBit: max,
107
+ BitwidthMode.QMinBit: min,
108
+ }
109
+
110
+ def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo):
111
+ self.graph = graph
112
+ self.fw_impl = fw_impl
113
+ self.fw_info = fw_info
114
+
115
+ # Currently we go over the full graph even if utilization won't be requested for all nodes.
116
+ # We could fill the cache on the fly only for requested nodes, but it's probably negligible.
117
+ self._act_tensors_size = {}
118
+ self._params_cnt = {}
119
+ for n in graph.nodes:
120
+ self._act_tensors_size[n] = n.get_total_output_params()
121
+ self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
122
+ self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
123
+
124
+ @property
125
+ def cuts(self) -> Dict[Cut, List[BaseNode]]:
126
+ """ Compute if needed and return graph cuts and their memory element nodes. """
127
+ if self._cuts is None:
128
+ memory_graph = MemoryGraph(deepcopy(self.graph))
129
+ _, _, cuts = compute_graph_max_cut(memory_graph)
130
+ if cuts is None: # pragma: no cover
131
+ raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
132
+ cuts = [cut for cut in cuts if cut.mem_elements.elements]
133
+ # cache cuts nodes for future use, so do not filter by target
134
+ self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
135
+ for cut in cuts}
136
+ return self._cuts
137
+
138
+ def compute_resource_utilization(self,
139
+ target_criterion: TargetInclusionCriterion,
140
+ bitwidth_mode: BitwidthMode,
141
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
142
+ w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None,
143
+ ru_targets: Iterable[RUTarget] = None) -> ResourceUtilization:
144
+ """
145
+ Compute network's resource utilization.
146
+
147
+ Args:
148
+ target_criterion: criterion to include targets for computation (applies to weights, activation).
149
+ bitwidth_mode: bit-width mode for computation.
150
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
151
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
152
+ activations, if not provided, the default configuration will be extracted from the node.
153
+ w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
154
+ In custom mode, must provide configuration for all configurable weights. For non-configurable
155
+ weights, if not provided, the default configuration will be extracted from the node.
156
+ ru_targets: metrics to include for computation. If None, all metrics are calculated.
157
+
158
+ Returns:
159
+ Resource utilization object.
160
+ """
161
+ ru_targets = set(ru_targets) if ru_targets else set(RUTarget)
162
+
163
+ w_total, a_total = None, None
164
+ if {RUTarget.WEIGHTS, RUTarget.TOTAL}.intersection(ru_targets):
165
+ w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs)
166
+ elif w_qcs is not None: # pragma: no cover
167
+ raise ValueError('Weight configuration passed but no relevant metric requested.')
168
+
169
+ if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
170
+ a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
171
+ elif act_qcs is not None: # pragma: no cover
172
+ raise ValueError('Activation configuration passed but no relevant metric requested.')
173
+
174
+ ru = ResourceUtilization()
175
+ if RUTarget.WEIGHTS in ru_targets:
176
+ ru.weights_memory = w_total
177
+ if RUTarget.ACTIVATION in ru_targets:
178
+ ru.activation_memory = a_total
179
+ if RUTarget.TOTAL in ru_targets:
180
+ ru.total_memory = w_total + a_total
181
+ if RUTarget.BOPS in ru_targets:
182
+ ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
183
+ bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
184
+
185
+ assert ru.get_restricted_metrics() == set(ru_targets), 'Mismatch between the number of requested and computed metrics'
186
+ return ru
187
+
188
+ def compute_weights_utilization(self,
189
+ target_criterion: TargetInclusionCriterion,
190
+ bitwidth_mode: BitwidthMode,
191
+ w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None) \
192
+ -> Tuple[float, Dict[BaseNode, Utilization], Dict[BaseNode, Dict[str, Utilization]]]:
193
+ """
194
+ Compute graph's weights resource utilization.
195
+
196
+ Args:
197
+ target_criterion: criterion to include targets for computation.
198
+ bitwidth_mode: bit-width mode for computation.
199
+ w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
200
+ In custom mode, must provide configuration for all configurable weights. For non-configurable
201
+ weights, if not provided, the default configuration will be extracted from the node.
202
+
203
+ Returns:
204
+ - Total weights utilization of the network.
205
+ - Per node total weights utilization. Dict keys are nodes in a topological order.
206
+ - Detailed per node per weight attribute utilization. Dict keys are nodes in a topological order.
207
+ """
208
+ nodes = self._get_target_weight_nodes(target_criterion, include_reused=False)
209
+ if not nodes:
210
+ return 0, {}, {}
211
+
212
+ util_per_node: Dict[BaseNode, Utilization] = {}
213
+ util_per_node_per_weight = {}
214
+
215
+ for n in self._topo_sort(nodes):
216
+ w_qc = w_qcs.get(n) if w_qcs else None
217
+ node_weights_util, per_weight_util = self.compute_node_weights_utilization(n, target_criterion,
218
+ bitwidth_mode, w_qc)
219
+ util_per_node[n] = node_weights_util
220
+ util_per_node_per_weight[n] = per_weight_util
221
+
222
+ total_util = sum(util_per_node.values())
223
+ return total_util.bytes, util_per_node, util_per_node_per_weight
224
+
225
+ def compute_node_weights_utilization(self,
226
+ n: BaseNode,
227
+ target_criterion: TargetInclusionCriterion,
228
+ bitwidth_mode: BitwidthMode,
229
+ qc: NodeWeightsQuantizationConfig)\
230
+ -> Tuple[Utilization, Dict[str, Utilization]]:
231
+ """
232
+ Compute resource utilization for weights of a node.
233
+
234
+ Args:
235
+ n: node.
236
+ target_criterion: criterion to include weights for computation.
237
+ bitwidth_mode: bit-width mode for the computation.
238
+ qc: custom weights quantization configuration. Should be provided for custom bit mode only.
239
+ In custom mode, must provide configuration for all configurable weights. For non-configurable
240
+ weights, if not provided, the default configuration will be extracted from the node.
241
+
242
+ Returns:
243
+ - Node's total weights utilization.
244
+ - Detailed per weight attribute utilization.
245
+ """
246
+ weight_attrs = self._get_target_weight_attrs(n, target_criterion)
247
+ if not weight_attrs: # pragma: no cover
248
+ return Utilization(0, 0), {}
249
+
250
+ attr_util = {}
251
+ for attr in weight_attrs:
252
+ size = self._params_cnt[n][attr]
253
+ nbits = self._get_weight_nbits(n, attr, bitwidth_mode, qc)
254
+ bytes_ = size * nbits / 8
255
+ attr_util[attr] = Utilization(size, bytes_)
256
+
257
+ total_weights: Utilization = sum(attr_util.values()) # type: ignore
258
+ return total_weights, attr_util
259
+
260
+ def compute_activations_utilization(self,
261
+ target_criterion: TargetInclusionCriterion,
262
+ bitwidth_mode: BitwidthMode,
263
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None):
264
+ """
265
+ Compute total activations utilization in the graph.
266
+
267
+ Args:
268
+ target_criterion: criterion to include weights for computation.
269
+ bitwidth_mode: bit-width mode for the computation.
270
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
271
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
272
+ activations, if not provided, the default configuration will be extracted from the node.
273
+
274
+ Returns:
275
+ Total activation utilization of the network.
276
+ """
277
+ return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
278
+
279
+ def compute_activation_utilization_by_cut(self,
280
+ target_criterion: TargetInclusionCriterion,
281
+ bitwidth_mode: BitwidthMode,
282
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
283
+ -> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
284
+ """
285
+ Compute graph activation cuts utilization.
286
+
287
+ Args:
288
+ target_criterion: criterion to include weights for computation.
289
+ bitwidth_mode: bit-width mode for the computation.
290
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
291
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
292
+ activations, if not provided, the default configuration will be extracted from the node.
293
+
294
+ Returns:
295
+ - Total activation utilization of the network.
296
+ - Total activation utilization per cut.
297
+ - Detailed activation utilization per cut per node.
298
+ """
299
+ if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover
300
+ raise NotImplementedError('Computing MaxCut activation utilization is currently only supported for quantized targets.')
301
+
302
+ graph_target_nodes = self._get_target_activation_nodes(target_criterion, include_reused=True)
303
+ # if there are no target activations in the graph, don't waste time looking for cuts
304
+ if not graph_target_nodes:
305
+ return 0, {}, {}
306
+
307
+ util_per_cut: Dict[Cut, Utilization] = {} # type: ignore
308
+ util_per_cut_per_node = defaultdict(dict)
309
+ for cut in self.cuts:
310
+ cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
311
+ if not cut_target_nodes:
312
+ continue
313
+ for n in cut_target_nodes:
314
+ qc = act_qcs.get(n) if act_qcs else None
315
+ util_per_cut_per_node[cut][n] = self.compute_node_activation_tensor_utilization(n, target_criterion,
316
+ bitwidth_mode, qc)
317
+ util_per_cut[cut] = sum(util_per_cut_per_node[cut].values()) # type: ignore
318
+
319
+ total_util = max(util_per_cut.values())
320
+ return total_util.bytes, util_per_cut, util_per_cut_per_node
321
+
322
+ def compute_activation_tensors_utilization(self,
323
+ target_criterion: TargetInclusionCriterion,
324
+ bitwidth_mode: BitwidthMode,
325
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
326
+ include_reused=False) \
327
+ -> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover
328
+ """
329
+ Compute resource utilization for graph's activations tensors.
330
+
331
+ Args:
332
+ target_criterion: criterion to include weights for computation.
333
+ bitwidth_mode: bit-width mode for the computation.
334
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
335
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
336
+ activations, if not provided, the default configuration will be extracted from the node.
337
+ include_reused: whether to include reused nodes.
338
+ Returns:
339
+ - Total activation utilization of the network.
340
+ - Detailed utilization per node. Dict keys are nodes in a topological order.
341
+
342
+ """
343
+ nodes = self._get_target_activation_nodes(target_criterion, include_reused=include_reused)
344
+ if not nodes:
345
+ return 0, {}
346
+
347
+ util_per_node: Dict[BaseNode, Utilization] = {}
348
+ for n in self._topo_sort(nodes):
349
+ qc = act_qcs.get(n) if act_qcs else None
350
+ util = self.compute_node_activation_tensor_utilization(n, None, bitwidth_mode, qc)
351
+ util_per_node[n] = util
352
+
353
+ total_util = max(util_per_node.values())
354
+ return total_util.bytes, util_per_node
355
+
356
+ def compute_node_activation_tensor_utilization(self,
357
+ n: BaseNode,
358
+ target_criterion: Optional[TargetInclusionCriterion],
359
+ bitwidth_mode: BitwidthMode,
360
+ qc: Optional[NodeActivationQuantizationConfig]) -> Utilization:
361
+ """
362
+ Compute activation resource utilization for a node.
363
+
364
+ Args:
365
+ n: node.
366
+ target_criterion: criterion to include nodes for computation. If None, will skip the check.
367
+ bitwidth_mode: bit-width mode for the computation.
368
+ qc: activation quantization config for the node. Should be provided only in custom bit mode.
369
+ In custom mode, must be provided if the activation is configurable. For non-configurable activation, if
370
+ not passed, the default configuration will be extracted from the node.
371
+ Returns:
372
+ Node's activation utilization.
373
+ """
374
+ if target_criterion:
375
+ nodes = self._get_target_activation_nodes(target_criterion=target_criterion, include_reused=True, nodes=[n])
376
+ if not nodes: # pragma: no cover
377
+ return Utilization(0, 0)
378
+
379
+ size = self._act_tensors_size[n]
380
+ nbits = self._get_activation_nbits(n, bitwidth_mode, qc)
381
+ bytes_ = size * nbits / 8
382
+ return Utilization(size, bytes_)
383
+
384
+ def compute_bops(self,
385
+ target_criterion: TargetInclusionCriterion,
386
+ bitwidth_mode: BitwidthMode,
387
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
388
+ w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None) \
389
+ -> Tuple[int, Dict[BaseNode, int]]:
390
+ """
391
+ Compute bit operations based on nodes with kernel.
392
+ Note that 'target_criterion' applies to weights, and BOPS are computed for the selected nodes regardless
393
+ of the input activation quantization or lack thereof.
394
+
395
+ Args:
396
+ target_criterion: criterion to include nodes for computation.
397
+ bitwidth_mode: bit-width mode for computation.
398
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
399
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
400
+ activations, if not provided, the default configuration will be extracted from the node.
401
+ w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
402
+ In custom mode, must provide configuration for all configurable weights. For non-configurable
403
+ weights, if not provided, the default configuration will be extracted from the node.
404
+
405
+ Returns:
406
+ - Total BOPS count of the network.
407
+ - Detailed BOPS count per node.
408
+ """
409
+ if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover
410
+ raise NotImplementedError('BOPS computation is currently only supported for quantized targets.')
411
+
412
+ nodes = self._get_target_weight_nodes(target_criterion, include_reused=True)
413
+ # filter out nodes with only positional weights # TODO add as arg to get target nodes
414
+ nodes = [n for n in nodes if n.has_kernel_weight_to_quantize(self.fw_info)]
415
+
416
+ nodes_bops = {}
417
+ for n in nodes:
418
+ w_qc = w_qcs.get(n) if w_qcs else None
419
+ nodes_bops[n] = self.compute_node_bops(n, bitwidth_mode, act_qcs=act_qcs, w_qc=w_qc)
420
+
421
+ return sum(nodes_bops.values()), nodes_bops
422
+
423
+ def compute_node_bops(self,
424
+ n: BaseNode,
425
+ bitwidth_mode: BitwidthMode,
426
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
427
+ w_qc: Optional[NodeWeightsQuantizationConfig] = None) -> Union[float, int]:
428
+ """
429
+ Compute Bit Operations of a node.
430
+
431
+ Args:
432
+ n: node.
433
+ bitwidth_mode: bit-width mode for the computation.
434
+ act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
435
+ In custom mode, must provide configuration for all configurable activations. For non-configurable
436
+ activations, if not provided, the default configuration will be extracted from the node.
437
+ w_qc: weights quantization config for the node. Should be provided only in custom bit mode.
438
+ Must provide configuration for all configurable weights. For non-configurable weights, will use the
439
+ provided configuration if found, or extract the default configuration from the node otherwise.
440
+
441
+ Returns:
442
+ Node's BOPS count.
443
+ """
444
+ node_mac = self.fw_impl.get_node_mac_operations(n, self.fw_info)
445
+ if node_mac == 0: # pragma: no cover
446
+ return node_mac
447
+
448
+ incoming_edges = self.graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
449
+ # TODO temporary adding this for const_representation test in torch which has Linear with const input
450
+ if not incoming_edges:
451
+ return 0
452
+ assert len(incoming_edges) == 1, \
453
+ f'Unexpected number of inputs {len(incoming_edges)} for BOPS calculation. Expected 1.'
454
+ input_act_node = incoming_edges[0].source_node
455
+ act_qc = act_qcs.get(input_act_node) if act_qcs else None
456
+ a_nbits = self._get_activation_nbits(input_act_node, bitwidth_mode, act_qc)
457
+
458
+ kernel_attrs = self.fw_info.get_kernel_op_attributes(n.type)
459
+ if len(kernel_attrs) > 1: # pragma: no cover
460
+ raise NotImplementedError('Multiple kernel attributes are not supported for BOPS computation.')
461
+ kernel_attr = kernel_attrs[0]
462
+ w_nbits = self._get_weight_nbits(n, kernel_attr, bitwidth_mode, w_qc)
463
+
464
+ node_bops = a_nbits * w_nbits * node_mac
465
+ return node_bops
466
+
467
+ def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
468
+ """
469
+ Retrieve target nodes from a cut filtered by a criterion.
470
+
471
+ Args:
472
+ cut: a graph cut.
473
+ target_criterion: criterion to include nodes for computation.
474
+
475
+ Returns:
476
+ A list of target nodes from a cut.
477
+ """
478
+ cut_nodes = self.cuts[cut]
479
+ return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
480
+
481
+ def _get_target_weight_nodes(self,
482
+ target_criterion: TargetInclusionCriterion,
483
+ include_reused: bool) -> List[BaseNode]:
484
+ """
485
+ Collect nodes to include in weights utilization computation.
486
+
487
+ Args:
488
+ target_criterion: criterion to include weights for computation.
489
+ include_reused: whether to include reused nodes.
490
+
491
+ Returns:
492
+ Target nodes.
493
+ """
494
+ if target_criterion == TargetInclusionCriterion.QConfigurable:
495
+ nodes = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
496
+ elif target_criterion == TargetInclusionCriterion.AnyQuantized:
497
+ nodes = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
498
+ elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
499
+ # TODO this is wrong. Need to look at specific weights and not the whole node (if w1 is configurable and w2
500
+ # is non-configurable we want to discover the node both as configurable and non-configurable)
501
+ quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
502
+ configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
503
+ nodes = [n for n in quantized if n not in configurable]
504
+ elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover
505
+ nodes = list(self.graph.nodes)
506
+ else: # pragma: no cover
507
+ raise ValueError(f'Unknown {target_criterion}.')
508
+
509
+ if not include_reused:
510
+ nodes = [n for n in nodes if not n.reuse]
511
+ return nodes
512
+
513
+ def _get_target_weight_attrs(self, n: BaseNode, target_criterion: TargetInclusionCriterion) -> List[str]:
514
+ """
515
+ Collect weight attributes of a node per criterion.
516
+
517
+ Args:
518
+ n: node.
519
+ target_criterion: selection criterion.
520
+
521
+ Returns:
522
+ Selected weight attributes names.
523
+ """
524
+ weight_attrs = n.get_node_weights_attributes()
525
+ if target_criterion == TargetInclusionCriterion.QConfigurable:
526
+ weight_attrs = [attr for attr in weight_attrs if n.is_configurable_weight(attr)]
527
+ elif target_criterion == TargetInclusionCriterion.AnyQuantized:
528
+ weight_attrs = [attr for attr in weight_attrs if n.is_weights_quantization_enabled(attr)]
529
+ elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
530
+ quantized = [attr for attr in weight_attrs if n.is_weights_quantization_enabled(attr)]
531
+ configurable = [attr for attr in weight_attrs if n.is_configurable_weight(attr)]
532
+ weight_attrs = [attr for attr in quantized if attr not in configurable]
533
+ elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
534
+ raise ValueError(f'Unknown {target_criterion}')
535
+ return weight_attrs
536
+
537
+ def _topo_sort(self, nodes: Sequence[BaseNode]) -> List[BaseNode]:
538
+ """
539
+ Sort nodes in a topological order (based on graph's nodes).
540
+
541
+ Args:
542
+ nodes: nodes to sort.
543
+
544
+ Returns:
545
+ Nodes in topological order.
546
+ """
547
+ graph_topo_nodes = self.graph.get_topo_sorted_nodes()
548
+ topo_nodes = [n for n in graph_topo_nodes if n in nodes]
549
+ if len(topo_nodes) != len(nodes): # pragma: no cover
550
+ missing_nodes = [n for n in nodes if n not in topo_nodes]
551
+ raise ValueError(f'Could not topo-sort, nodes {missing_nodes} do not match the graph nodes.')
552
+ return topo_nodes
553
+
554
+ def _get_target_activation_nodes(self,
555
+ target_criterion: TargetInclusionCriterion,
556
+ include_reused: bool,
557
+ nodes: Optional[List[BaseNode]] = None) -> List[BaseNode]:
558
+ """
559
+ Collect nodes to include in activation utilization computation.
560
+
561
+ Args:
562
+ target_criterion: criterion to include activations for computation.
563
+ include_reused: whether to include reused nodes.
564
+ nodes: nodes to filter target nodes from. By default, uses the graph nodes.
565
+
566
+ Returns:
567
+ Selected nodes.
568
+ """
569
+ nodes = nodes or self.graph.nodes
570
+ if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover
571
+ nodes = [n for n in nodes if n.has_configurable_activation()]
572
+ elif target_criterion == TargetInclusionCriterion.AnyQuantized:
573
+ nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
574
+ elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover
575
+ nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
576
+ elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
577
+ raise ValueError(f'Unknown {target_criterion}.')
578
+ if not include_reused: # pragma: no cover
579
+ nodes = [n for n in nodes if not n.reuse]
580
+ return nodes
581
+
582
+ @classmethod
583
+ def _get_activation_nbits(cls,
584
+ n: BaseNode,
585
+ bitwidth_mode: BitwidthMode,
586
+ act_qc: Optional[NodeActivationQuantizationConfig]) -> int:
587
+ """
588
+ Get activation bit-width for a node according to the requested bit-width mode.
589
+
590
+ Args:
591
+ n: node.
592
+ bitwidth_mode: bit-width mode for computation.
593
+ act_qc: activation quantization config for the node. Should be provided only in custom bit mode.
594
+ In custom mode, must be provided if the activation is configurable. For non-configurable activation, if
595
+ not passed, the default configuration will be extracted from the node.
596
+
597
+ Returns:
598
+ Activation bit-width.
599
+ """
600
+ if act_qc:
601
+ if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover
602
+ raise ValueError(f'Activation config is not expected for non-custom bit mode {bitwidth_mode}')
603
+ return act_qc.activation_n_bits if act_qc.enable_activation_quantization else FLOAT_BITWIDTH
604
+
605
+ if bitwidth_mode == BitwidthMode.Float or not n.is_activation_quantization_enabled():
606
+ return FLOAT_BITWIDTH
607
+
608
+ if bitwidth_mode == BitwidthMode.Q8Bit:
609
+ return 8
610
+
611
+ if bitwidth_mode in cls._bitwidth_mode_fn:
612
+ candidates_nbits = [c.activation_quantization_cfg.activation_n_bits for c in n.candidates_quantization_cfg]
613
+ return cls._bitwidth_mode_fn[bitwidth_mode](candidates_nbits)
614
+
615
+ if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]:
616
+ qcs = n.get_unique_activation_candidates()
617
+ if len(qcs) != 1: # pragma: no cover
618
+ raise ValueError(f'Could not retrieve the activation quantization candidate for node {n.name} '
619
+ f'as it has {len(qcs)}!=1 unique candidates .')
620
+ return qcs[0].activation_quantization_cfg.activation_n_bits
621
+
622
+ raise ValueError(f'Unknown mode {bitwidth_mode}') # pragma: no cover
623
+
624
+ @classmethod
625
+ def _get_weight_nbits(cls,
626
+ n: BaseNode,
627
+ w_attr: str,
628
+ bitwidth_mode: BitwidthMode,
629
+ w_qc: Optional[NodeWeightsQuantizationConfig]) -> int:
630
+ """
631
+ Get the bit-width of a specific weight of a node according to the requested bit-width mode.
632
+
633
+ Args:
634
+ n: node.
635
+ w_attr: weight attribute.
636
+ bitwidth_mode: bit-width mode for the computation.
637
+ w_qc: weights quantization config for the node. Should be provided only in custom bit mode.
638
+ Must provide configuration for all configurable weights. For non-configurable weights, will use the
639
+ provided configuration if found, or extract the default configuration from the node otherwise.
640
+
641
+ Returns:
642
+ Weight bit-width.
643
+ """
644
+ if w_qc and w_qc.has_attribute_config(w_attr):
645
+ if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover
646
+ raise ValueError('Weight config is not expected for non-custom bit mode {bitwidth_mode}')
647
+ attr_cfg = w_qc.get_attr_config(w_attr)
648
+ return attr_cfg.weights_n_bits if attr_cfg.enable_weights_quantization else FLOAT_BITWIDTH
649
+
650
+ if bitwidth_mode == BitwidthMode.Float or not n.is_weights_quantization_enabled(w_attr):
651
+ return FLOAT_BITWIDTH
652
+
653
+ if bitwidth_mode == BitwidthMode.Q8Bit:
654
+ return 8
655
+
656
+ node_qcs = n.get_unique_weights_candidates(w_attr)
657
+ w_qcs = [qc.weights_quantization_cfg.get_attr_config(w_attr) for qc in node_qcs]
658
+ if bitwidth_mode in cls._bitwidth_mode_fn:
659
+ return cls._bitwidth_mode_fn[bitwidth_mode]([qc.weights_n_bits for qc in w_qcs])
660
+
661
+ if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]:
662
+ # if configuration was not passed and the weight has only one candidate, use it
663
+ if len(w_qcs) != 1: # pragma: no cover
664
+ raise ValueError(f'Could not retrieve the quantization candidate for attr {w_attr} of node {n.name} '
665
+ f'as it {len(w_qcs)}!=1 unique candidates.')
666
+ return w_qcs[0].weights_n_bits
667
+
668
+ raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover