mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -0,0 +1,668 @@
|
|
1
|
+
# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from collections import defaultdict
|
16
|
+
from copy import deepcopy
|
17
|
+
from enum import Enum, auto
|
18
|
+
from functools import lru_cache
|
19
|
+
from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence
|
20
|
+
|
21
|
+
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
22
|
+
from model_compression_toolkit.core import FrameworkInfo
|
23
|
+
from model_compression_toolkit.core.common import Graph, BaseNode
|
24
|
+
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
25
|
+
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
|
26
|
+
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut
|
27
|
+
from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
|
28
|
+
from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
|
29
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
30
|
+
RUTarget, ResourceUtilization
|
31
|
+
from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
|
32
|
+
NodeActivationQuantizationConfig
|
33
|
+
|
34
|
+
|
35
|
+
class BitwidthMode(Enum):
|
36
|
+
"""
|
37
|
+
Bit-width configuration for resource utilization computation.
|
38
|
+
|
39
|
+
Float: original un-quantized configuration. Assumed to be 32-bit float.
|
40
|
+
QMaxBit: maximal bit-width configurations. Assigns each node its maximal available precision according to the
|
41
|
+
target platform capabilities.
|
42
|
+
QMinBit: minimal bit-width configuration. Assigns each node its minimal available precision according to the
|
43
|
+
target platform capabilities.
|
44
|
+
QCustom: explicitly provided bit-width configuration.
|
45
|
+
QDefaultSP: default single-precision bit-width configuration. Can be used either in a single-precision mode,
|
46
|
+
or along with TargetInclusionCriterion.QNonConfigurable, which computes the resource utilization only for
|
47
|
+
single-precision nodes. To compute custom single precision configuration, use QCustom.
|
48
|
+
"""
|
49
|
+
Float = auto()
|
50
|
+
Q8Bit = auto()
|
51
|
+
QMaxBit = auto()
|
52
|
+
QMinBit = auto()
|
53
|
+
QCustom = auto()
|
54
|
+
QDefaultSP = auto()
|
55
|
+
|
56
|
+
|
57
|
+
class TargetInclusionCriterion(Enum):
|
58
|
+
"""
|
59
|
+
Target nodes / parameters to include for resource utilization computation.
|
60
|
+
|
61
|
+
QConfigurable: configurable for Mixed Precision targets (multiple quantization candidates).
|
62
|
+
QNonConfigurable: non-configurable targets (single quantization candidate).
|
63
|
+
AnyQuantized: any quantized targets (configurable and non-configurable).
|
64
|
+
Any: all targets (quantized + float).
|
65
|
+
"""
|
66
|
+
QConfigurable = auto()
|
67
|
+
QNonConfigurable = auto()
|
68
|
+
AnyQuantized = auto()
|
69
|
+
Any = auto()
|
70
|
+
|
71
|
+
|
72
|
+
class Utilization(NamedTuple):
|
73
|
+
"""
|
74
|
+
Utility container for a single resource utilization result.
|
75
|
+
Supports sum, max, min over an iterable of Utilization objects.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
size: parameters or activation tensor(s) size.
|
79
|
+
bytes: memory utilization.
|
80
|
+
"""
|
81
|
+
size: int
|
82
|
+
bytes: Optional[float]
|
83
|
+
|
84
|
+
def __add__(self, other: 'Utilization') -> 'Utilization':
|
85
|
+
return Utilization(self.size + other.size, self.bytes + other.bytes)
|
86
|
+
|
87
|
+
def __radd__(self, other: Union['Utilization', Literal[0]]):
|
88
|
+
# Needed for sum (with default start_value=0).
|
89
|
+
if other == 0:
|
90
|
+
return self
|
91
|
+
return self + other # pragma: no cover
|
92
|
+
|
93
|
+
def __gt__(self, other: 'Utilization'):
|
94
|
+
# Needed for max. Compare by bytes.
|
95
|
+
return self.bytes > other.bytes
|
96
|
+
|
97
|
+
def __lt__(self, other: 'Utilization'):
|
98
|
+
# Needed for min. Compare by bytes.
|
99
|
+
return self.bytes < other.bytes # pragma: no cover
|
100
|
+
|
101
|
+
|
102
|
+
class ResourceUtilizationCalculator:
|
103
|
+
""" Resource utilization calculator. """
|
104
|
+
|
105
|
+
_bitwidth_mode_fn = {
|
106
|
+
BitwidthMode.QMaxBit: max,
|
107
|
+
BitwidthMode.QMinBit: min,
|
108
|
+
}
|
109
|
+
|
110
|
+
def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo):
|
111
|
+
self.graph = graph
|
112
|
+
self.fw_impl = fw_impl
|
113
|
+
self.fw_info = fw_info
|
114
|
+
|
115
|
+
# Currently we go over the full graph even if utilization won't be requested for all nodes.
|
116
|
+
# We could fill the cache on the fly only for requested nodes, but it's probably negligible.
|
117
|
+
self._act_tensors_size = {}
|
118
|
+
self._params_cnt = {}
|
119
|
+
for n in graph.nodes:
|
120
|
+
self._act_tensors_size[n] = n.get_total_output_params()
|
121
|
+
self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
|
122
|
+
self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
|
123
|
+
|
124
|
+
@property
|
125
|
+
def cuts(self) -> Dict[Cut, List[BaseNode]]:
|
126
|
+
""" Compute if needed and return graph cuts and their memory element nodes. """
|
127
|
+
if self._cuts is None:
|
128
|
+
memory_graph = MemoryGraph(deepcopy(self.graph))
|
129
|
+
_, _, cuts = compute_graph_max_cut(memory_graph)
|
130
|
+
if cuts is None: # pragma: no cover
|
131
|
+
raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
132
|
+
cuts = [cut for cut in cuts if cut.mem_elements.elements]
|
133
|
+
# cache cuts nodes for future use, so do not filter by target
|
134
|
+
self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
|
135
|
+
for cut in cuts}
|
136
|
+
return self._cuts
|
137
|
+
|
138
|
+
def compute_resource_utilization(self,
|
139
|
+
target_criterion: TargetInclusionCriterion,
|
140
|
+
bitwidth_mode: BitwidthMode,
|
141
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
142
|
+
w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None,
|
143
|
+
ru_targets: Iterable[RUTarget] = None) -> ResourceUtilization:
|
144
|
+
"""
|
145
|
+
Compute network's resource utilization.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
target_criterion: criterion to include targets for computation (applies to weights, activation).
|
149
|
+
bitwidth_mode: bit-width mode for computation.
|
150
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
151
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
152
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
153
|
+
w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
|
154
|
+
In custom mode, must provide configuration for all configurable weights. For non-configurable
|
155
|
+
weights, if not provided, the default configuration will be extracted from the node.
|
156
|
+
ru_targets: metrics to include for computation. If None, all metrics are calculated.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
Resource utilization object.
|
160
|
+
"""
|
161
|
+
ru_targets = set(ru_targets) if ru_targets else set(RUTarget)
|
162
|
+
|
163
|
+
w_total, a_total = None, None
|
164
|
+
if {RUTarget.WEIGHTS, RUTarget.TOTAL}.intersection(ru_targets):
|
165
|
+
w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs)
|
166
|
+
elif w_qcs is not None: # pragma: no cover
|
167
|
+
raise ValueError('Weight configuration passed but no relevant metric requested.')
|
168
|
+
|
169
|
+
if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
|
170
|
+
a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
|
171
|
+
elif act_qcs is not None: # pragma: no cover
|
172
|
+
raise ValueError('Activation configuration passed but no relevant metric requested.')
|
173
|
+
|
174
|
+
ru = ResourceUtilization()
|
175
|
+
if RUTarget.WEIGHTS in ru_targets:
|
176
|
+
ru.weights_memory = w_total
|
177
|
+
if RUTarget.ACTIVATION in ru_targets:
|
178
|
+
ru.activation_memory = a_total
|
179
|
+
if RUTarget.TOTAL in ru_targets:
|
180
|
+
ru.total_memory = w_total + a_total
|
181
|
+
if RUTarget.BOPS in ru_targets:
|
182
|
+
ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
|
183
|
+
bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
|
184
|
+
|
185
|
+
assert ru.get_restricted_metrics() == set(ru_targets), 'Mismatch between the number of requested and computed metrics'
|
186
|
+
return ru
|
187
|
+
|
188
|
+
def compute_weights_utilization(self,
|
189
|
+
target_criterion: TargetInclusionCriterion,
|
190
|
+
bitwidth_mode: BitwidthMode,
|
191
|
+
w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None) \
|
192
|
+
-> Tuple[float, Dict[BaseNode, Utilization], Dict[BaseNode, Dict[str, Utilization]]]:
|
193
|
+
"""
|
194
|
+
Compute graph's weights resource utilization.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
target_criterion: criterion to include targets for computation.
|
198
|
+
bitwidth_mode: bit-width mode for computation.
|
199
|
+
w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
|
200
|
+
In custom mode, must provide configuration for all configurable weights. For non-configurable
|
201
|
+
weights, if not provided, the default configuration will be extracted from the node.
|
202
|
+
|
203
|
+
Returns:
|
204
|
+
- Total weights utilization of the network.
|
205
|
+
- Per node total weights utilization. Dict keys are nodes in a topological order.
|
206
|
+
- Detailed per node per weight attribute utilization. Dict keys are nodes in a topological order.
|
207
|
+
"""
|
208
|
+
nodes = self._get_target_weight_nodes(target_criterion, include_reused=False)
|
209
|
+
if not nodes:
|
210
|
+
return 0, {}, {}
|
211
|
+
|
212
|
+
util_per_node: Dict[BaseNode, Utilization] = {}
|
213
|
+
util_per_node_per_weight = {}
|
214
|
+
|
215
|
+
for n in self._topo_sort(nodes):
|
216
|
+
w_qc = w_qcs.get(n) if w_qcs else None
|
217
|
+
node_weights_util, per_weight_util = self.compute_node_weights_utilization(n, target_criterion,
|
218
|
+
bitwidth_mode, w_qc)
|
219
|
+
util_per_node[n] = node_weights_util
|
220
|
+
util_per_node_per_weight[n] = per_weight_util
|
221
|
+
|
222
|
+
total_util = sum(util_per_node.values())
|
223
|
+
return total_util.bytes, util_per_node, util_per_node_per_weight
|
224
|
+
|
225
|
+
def compute_node_weights_utilization(self,
|
226
|
+
n: BaseNode,
|
227
|
+
target_criterion: TargetInclusionCriterion,
|
228
|
+
bitwidth_mode: BitwidthMode,
|
229
|
+
qc: NodeWeightsQuantizationConfig)\
|
230
|
+
-> Tuple[Utilization, Dict[str, Utilization]]:
|
231
|
+
"""
|
232
|
+
Compute resource utilization for weights of a node.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
n: node.
|
236
|
+
target_criterion: criterion to include weights for computation.
|
237
|
+
bitwidth_mode: bit-width mode for the computation.
|
238
|
+
qc: custom weights quantization configuration. Should be provided for custom bit mode only.
|
239
|
+
In custom mode, must provide configuration for all configurable weights. For non-configurable
|
240
|
+
weights, if not provided, the default configuration will be extracted from the node.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
- Node's total weights utilization.
|
244
|
+
- Detailed per weight attribute utilization.
|
245
|
+
"""
|
246
|
+
weight_attrs = self._get_target_weight_attrs(n, target_criterion)
|
247
|
+
if not weight_attrs: # pragma: no cover
|
248
|
+
return Utilization(0, 0), {}
|
249
|
+
|
250
|
+
attr_util = {}
|
251
|
+
for attr in weight_attrs:
|
252
|
+
size = self._params_cnt[n][attr]
|
253
|
+
nbits = self._get_weight_nbits(n, attr, bitwidth_mode, qc)
|
254
|
+
bytes_ = size * nbits / 8
|
255
|
+
attr_util[attr] = Utilization(size, bytes_)
|
256
|
+
|
257
|
+
total_weights: Utilization = sum(attr_util.values()) # type: ignore
|
258
|
+
return total_weights, attr_util
|
259
|
+
|
260
|
+
def compute_activations_utilization(self,
|
261
|
+
target_criterion: TargetInclusionCriterion,
|
262
|
+
bitwidth_mode: BitwidthMode,
|
263
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None):
|
264
|
+
"""
|
265
|
+
Compute total activations utilization in the graph.
|
266
|
+
|
267
|
+
Args:
|
268
|
+
target_criterion: criterion to include weights for computation.
|
269
|
+
bitwidth_mode: bit-width mode for the computation.
|
270
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
271
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
272
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
273
|
+
|
274
|
+
Returns:
|
275
|
+
Total activation utilization of the network.
|
276
|
+
"""
|
277
|
+
return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
|
278
|
+
|
279
|
+
def compute_activation_utilization_by_cut(self,
|
280
|
+
target_criterion: TargetInclusionCriterion,
|
281
|
+
bitwidth_mode: BitwidthMode,
|
282
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
|
283
|
+
-> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
|
284
|
+
"""
|
285
|
+
Compute graph activation cuts utilization.
|
286
|
+
|
287
|
+
Args:
|
288
|
+
target_criterion: criterion to include weights for computation.
|
289
|
+
bitwidth_mode: bit-width mode for the computation.
|
290
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
291
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
292
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
293
|
+
|
294
|
+
Returns:
|
295
|
+
- Total activation utilization of the network.
|
296
|
+
- Total activation utilization per cut.
|
297
|
+
- Detailed activation utilization per cut per node.
|
298
|
+
"""
|
299
|
+
if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover
|
300
|
+
raise NotImplementedError('Computing MaxCut activation utilization is currently only supported for quantized targets.')
|
301
|
+
|
302
|
+
graph_target_nodes = self._get_target_activation_nodes(target_criterion, include_reused=True)
|
303
|
+
# if there are no target activations in the graph, don't waste time looking for cuts
|
304
|
+
if not graph_target_nodes:
|
305
|
+
return 0, {}, {}
|
306
|
+
|
307
|
+
util_per_cut: Dict[Cut, Utilization] = {} # type: ignore
|
308
|
+
util_per_cut_per_node = defaultdict(dict)
|
309
|
+
for cut in self.cuts:
|
310
|
+
cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
|
311
|
+
if not cut_target_nodes:
|
312
|
+
continue
|
313
|
+
for n in cut_target_nodes:
|
314
|
+
qc = act_qcs.get(n) if act_qcs else None
|
315
|
+
util_per_cut_per_node[cut][n] = self.compute_node_activation_tensor_utilization(n, target_criterion,
|
316
|
+
bitwidth_mode, qc)
|
317
|
+
util_per_cut[cut] = sum(util_per_cut_per_node[cut].values()) # type: ignore
|
318
|
+
|
319
|
+
total_util = max(util_per_cut.values())
|
320
|
+
return total_util.bytes, util_per_cut, util_per_cut_per_node
|
321
|
+
|
322
|
+
def compute_activation_tensors_utilization(self,
|
323
|
+
target_criterion: TargetInclusionCriterion,
|
324
|
+
bitwidth_mode: BitwidthMode,
|
325
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
326
|
+
include_reused=False) \
|
327
|
+
-> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover
|
328
|
+
"""
|
329
|
+
Compute resource utilization for graph's activations tensors.
|
330
|
+
|
331
|
+
Args:
|
332
|
+
target_criterion: criterion to include weights for computation.
|
333
|
+
bitwidth_mode: bit-width mode for the computation.
|
334
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
335
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
336
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
337
|
+
include_reused: whether to include reused nodes.
|
338
|
+
Returns:
|
339
|
+
- Total activation utilization of the network.
|
340
|
+
- Detailed utilization per node. Dict keys are nodes in a topological order.
|
341
|
+
|
342
|
+
"""
|
343
|
+
nodes = self._get_target_activation_nodes(target_criterion, include_reused=include_reused)
|
344
|
+
if not nodes:
|
345
|
+
return 0, {}
|
346
|
+
|
347
|
+
util_per_node: Dict[BaseNode, Utilization] = {}
|
348
|
+
for n in self._topo_sort(nodes):
|
349
|
+
qc = act_qcs.get(n) if act_qcs else None
|
350
|
+
util = self.compute_node_activation_tensor_utilization(n, None, bitwidth_mode, qc)
|
351
|
+
util_per_node[n] = util
|
352
|
+
|
353
|
+
total_util = max(util_per_node.values())
|
354
|
+
return total_util.bytes, util_per_node
|
355
|
+
|
356
|
+
def compute_node_activation_tensor_utilization(self,
|
357
|
+
n: BaseNode,
|
358
|
+
target_criterion: Optional[TargetInclusionCriterion],
|
359
|
+
bitwidth_mode: BitwidthMode,
|
360
|
+
qc: Optional[NodeActivationQuantizationConfig]) -> Utilization:
|
361
|
+
"""
|
362
|
+
Compute activation resource utilization for a node.
|
363
|
+
|
364
|
+
Args:
|
365
|
+
n: node.
|
366
|
+
target_criterion: criterion to include nodes for computation. If None, will skip the check.
|
367
|
+
bitwidth_mode: bit-width mode for the computation.
|
368
|
+
qc: activation quantization config for the node. Should be provided only in custom bit mode.
|
369
|
+
In custom mode, must be provided if the activation is configurable. For non-configurable activation, if
|
370
|
+
not passed, the default configuration will be extracted from the node.
|
371
|
+
Returns:
|
372
|
+
Node's activation utilization.
|
373
|
+
"""
|
374
|
+
if target_criterion:
|
375
|
+
nodes = self._get_target_activation_nodes(target_criterion=target_criterion, include_reused=True, nodes=[n])
|
376
|
+
if not nodes: # pragma: no cover
|
377
|
+
return Utilization(0, 0)
|
378
|
+
|
379
|
+
size = self._act_tensors_size[n]
|
380
|
+
nbits = self._get_activation_nbits(n, bitwidth_mode, qc)
|
381
|
+
bytes_ = size * nbits / 8
|
382
|
+
return Utilization(size, bytes_)
|
383
|
+
|
384
|
+
def compute_bops(self,
|
385
|
+
target_criterion: TargetInclusionCriterion,
|
386
|
+
bitwidth_mode: BitwidthMode,
|
387
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
388
|
+
w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]] = None) \
|
389
|
+
-> Tuple[int, Dict[BaseNode, int]]:
|
390
|
+
"""
|
391
|
+
Compute bit operations based on nodes with kernel.
|
392
|
+
Note that 'target_criterion' applies to weights, and BOPS are computed for the selected nodes regardless
|
393
|
+
of the input activation quantization or lack thereof.
|
394
|
+
|
395
|
+
Args:
|
396
|
+
target_criterion: criterion to include nodes for computation.
|
397
|
+
bitwidth_mode: bit-width mode for computation.
|
398
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
399
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
400
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
401
|
+
w_qcs: custom weights quantization configuration. Should be provided for custom bit mode only.
|
402
|
+
In custom mode, must provide configuration for all configurable weights. For non-configurable
|
403
|
+
weights, if not provided, the default configuration will be extracted from the node.
|
404
|
+
|
405
|
+
Returns:
|
406
|
+
- Total BOPS count of the network.
|
407
|
+
- Detailed BOPS count per node.
|
408
|
+
"""
|
409
|
+
if target_criterion != TargetInclusionCriterion.AnyQuantized: # pragma: no cover
|
410
|
+
raise NotImplementedError('BOPS computation is currently only supported for quantized targets.')
|
411
|
+
|
412
|
+
nodes = self._get_target_weight_nodes(target_criterion, include_reused=True)
|
413
|
+
# filter out nodes with only positional weights # TODO add as arg to get target nodes
|
414
|
+
nodes = [n for n in nodes if n.has_kernel_weight_to_quantize(self.fw_info)]
|
415
|
+
|
416
|
+
nodes_bops = {}
|
417
|
+
for n in nodes:
|
418
|
+
w_qc = w_qcs.get(n) if w_qcs else None
|
419
|
+
nodes_bops[n] = self.compute_node_bops(n, bitwidth_mode, act_qcs=act_qcs, w_qc=w_qc)
|
420
|
+
|
421
|
+
return sum(nodes_bops.values()), nodes_bops
|
422
|
+
|
423
|
+
def compute_node_bops(self,
|
424
|
+
n: BaseNode,
|
425
|
+
bitwidth_mode: BitwidthMode,
|
426
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
427
|
+
w_qc: Optional[NodeWeightsQuantizationConfig] = None) -> Union[float, int]:
|
428
|
+
"""
|
429
|
+
Compute Bit Operations of a node.
|
430
|
+
|
431
|
+
Args:
|
432
|
+
n: node.
|
433
|
+
bitwidth_mode: bit-width mode for the computation.
|
434
|
+
act_qcs: custom activations quantization configuration. Should be provided for custom bit mode only.
|
435
|
+
In custom mode, must provide configuration for all configurable activations. For non-configurable
|
436
|
+
activations, if not provided, the default configuration will be extracted from the node.
|
437
|
+
w_qc: weights quantization config for the node. Should be provided only in custom bit mode.
|
438
|
+
Must provide configuration for all configurable weights. For non-configurable weights, will use the
|
439
|
+
provided configuration if found, or extract the default configuration from the node otherwise.
|
440
|
+
|
441
|
+
Returns:
|
442
|
+
Node's BOPS count.
|
443
|
+
"""
|
444
|
+
node_mac = self.fw_impl.get_node_mac_operations(n, self.fw_info)
|
445
|
+
if node_mac == 0: # pragma: no cover
|
446
|
+
return node_mac
|
447
|
+
|
448
|
+
incoming_edges = self.graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
|
449
|
+
# TODO temporary adding this for const_representation test in torch which has Linear with const input
|
450
|
+
if not incoming_edges:
|
451
|
+
return 0
|
452
|
+
assert len(incoming_edges) == 1, \
|
453
|
+
f'Unexpected number of inputs {len(incoming_edges)} for BOPS calculation. Expected 1.'
|
454
|
+
input_act_node = incoming_edges[0].source_node
|
455
|
+
act_qc = act_qcs.get(input_act_node) if act_qcs else None
|
456
|
+
a_nbits = self._get_activation_nbits(input_act_node, bitwidth_mode, act_qc)
|
457
|
+
|
458
|
+
kernel_attrs = self.fw_info.get_kernel_op_attributes(n.type)
|
459
|
+
if len(kernel_attrs) > 1: # pragma: no cover
|
460
|
+
raise NotImplementedError('Multiple kernel attributes are not supported for BOPS computation.')
|
461
|
+
kernel_attr = kernel_attrs[0]
|
462
|
+
w_nbits = self._get_weight_nbits(n, kernel_attr, bitwidth_mode, w_qc)
|
463
|
+
|
464
|
+
node_bops = a_nbits * w_nbits * node_mac
|
465
|
+
return node_bops
|
466
|
+
|
467
|
+
def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
|
468
|
+
"""
|
469
|
+
Retrieve target nodes from a cut filtered by a criterion.
|
470
|
+
|
471
|
+
Args:
|
472
|
+
cut: a graph cut.
|
473
|
+
target_criterion: criterion to include nodes for computation.
|
474
|
+
|
475
|
+
Returns:
|
476
|
+
A list of target nodes from a cut.
|
477
|
+
"""
|
478
|
+
cut_nodes = self.cuts[cut]
|
479
|
+
return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
|
480
|
+
|
481
|
+
def _get_target_weight_nodes(self,
|
482
|
+
target_criterion: TargetInclusionCriterion,
|
483
|
+
include_reused: bool) -> List[BaseNode]:
|
484
|
+
"""
|
485
|
+
Collect nodes to include in weights utilization computation.
|
486
|
+
|
487
|
+
Args:
|
488
|
+
target_criterion: criterion to include weights for computation.
|
489
|
+
include_reused: whether to include reused nodes.
|
490
|
+
|
491
|
+
Returns:
|
492
|
+
Target nodes.
|
493
|
+
"""
|
494
|
+
if target_criterion == TargetInclusionCriterion.QConfigurable:
|
495
|
+
nodes = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
|
496
|
+
elif target_criterion == TargetInclusionCriterion.AnyQuantized:
|
497
|
+
nodes = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
|
498
|
+
elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
|
499
|
+
# TODO this is wrong. Need to look at specific weights and not the whole node (if w1 is configurable and w2
|
500
|
+
# is non-configurable we want to discover the node both as configurable and non-configurable)
|
501
|
+
quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
|
502
|
+
configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
|
503
|
+
nodes = [n for n in quantized if n not in configurable]
|
504
|
+
elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover
|
505
|
+
nodes = list(self.graph.nodes)
|
506
|
+
else: # pragma: no cover
|
507
|
+
raise ValueError(f'Unknown {target_criterion}.')
|
508
|
+
|
509
|
+
if not include_reused:
|
510
|
+
nodes = [n for n in nodes if not n.reuse]
|
511
|
+
return nodes
|
512
|
+
|
513
|
+
def _get_target_weight_attrs(self, n: BaseNode, target_criterion: TargetInclusionCriterion) -> List[str]:
|
514
|
+
"""
|
515
|
+
Collect weight attributes of a node per criterion.
|
516
|
+
|
517
|
+
Args:
|
518
|
+
n: node.
|
519
|
+
target_criterion: selection criterion.
|
520
|
+
|
521
|
+
Returns:
|
522
|
+
Selected weight attributes names.
|
523
|
+
"""
|
524
|
+
weight_attrs = n.get_node_weights_attributes()
|
525
|
+
if target_criterion == TargetInclusionCriterion.QConfigurable:
|
526
|
+
weight_attrs = [attr for attr in weight_attrs if n.is_configurable_weight(attr)]
|
527
|
+
elif target_criterion == TargetInclusionCriterion.AnyQuantized:
|
528
|
+
weight_attrs = [attr for attr in weight_attrs if n.is_weights_quantization_enabled(attr)]
|
529
|
+
elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
|
530
|
+
quantized = [attr for attr in weight_attrs if n.is_weights_quantization_enabled(attr)]
|
531
|
+
configurable = [attr for attr in weight_attrs if n.is_configurable_weight(attr)]
|
532
|
+
weight_attrs = [attr for attr in quantized if attr not in configurable]
|
533
|
+
elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
|
534
|
+
raise ValueError(f'Unknown {target_criterion}')
|
535
|
+
return weight_attrs
|
536
|
+
|
537
|
+
def _topo_sort(self, nodes: Sequence[BaseNode]) -> List[BaseNode]:
|
538
|
+
"""
|
539
|
+
Sort nodes in a topological order (based on graph's nodes).
|
540
|
+
|
541
|
+
Args:
|
542
|
+
nodes: nodes to sort.
|
543
|
+
|
544
|
+
Returns:
|
545
|
+
Nodes in topological order.
|
546
|
+
"""
|
547
|
+
graph_topo_nodes = self.graph.get_topo_sorted_nodes()
|
548
|
+
topo_nodes = [n for n in graph_topo_nodes if n in nodes]
|
549
|
+
if len(topo_nodes) != len(nodes): # pragma: no cover
|
550
|
+
missing_nodes = [n for n in nodes if n not in topo_nodes]
|
551
|
+
raise ValueError(f'Could not topo-sort, nodes {missing_nodes} do not match the graph nodes.')
|
552
|
+
return topo_nodes
|
553
|
+
|
554
|
+
def _get_target_activation_nodes(self,
|
555
|
+
target_criterion: TargetInclusionCriterion,
|
556
|
+
include_reused: bool,
|
557
|
+
nodes: Optional[List[BaseNode]] = None) -> List[BaseNode]:
|
558
|
+
"""
|
559
|
+
Collect nodes to include in activation utilization computation.
|
560
|
+
|
561
|
+
Args:
|
562
|
+
target_criterion: criterion to include activations for computation.
|
563
|
+
include_reused: whether to include reused nodes.
|
564
|
+
nodes: nodes to filter target nodes from. By default, uses the graph nodes.
|
565
|
+
|
566
|
+
Returns:
|
567
|
+
Selected nodes.
|
568
|
+
"""
|
569
|
+
nodes = nodes or self.graph.nodes
|
570
|
+
if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover
|
571
|
+
nodes = [n for n in nodes if n.has_configurable_activation()]
|
572
|
+
elif target_criterion == TargetInclusionCriterion.AnyQuantized:
|
573
|
+
nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
|
574
|
+
elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover
|
575
|
+
nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
|
576
|
+
elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
|
577
|
+
raise ValueError(f'Unknown {target_criterion}.')
|
578
|
+
if not include_reused: # pragma: no cover
|
579
|
+
nodes = [n for n in nodes if not n.reuse]
|
580
|
+
return nodes
|
581
|
+
|
582
|
+
@classmethod
|
583
|
+
def _get_activation_nbits(cls,
|
584
|
+
n: BaseNode,
|
585
|
+
bitwidth_mode: BitwidthMode,
|
586
|
+
act_qc: Optional[NodeActivationQuantizationConfig]) -> int:
|
587
|
+
"""
|
588
|
+
Get activation bit-width for a node according to the requested bit-width mode.
|
589
|
+
|
590
|
+
Args:
|
591
|
+
n: node.
|
592
|
+
bitwidth_mode: bit-width mode for computation.
|
593
|
+
act_qc: activation quantization config for the node. Should be provided only in custom bit mode.
|
594
|
+
In custom mode, must be provided if the activation is configurable. For non-configurable activation, if
|
595
|
+
not passed, the default configuration will be extracted from the node.
|
596
|
+
|
597
|
+
Returns:
|
598
|
+
Activation bit-width.
|
599
|
+
"""
|
600
|
+
if act_qc:
|
601
|
+
if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover
|
602
|
+
raise ValueError(f'Activation config is not expected for non-custom bit mode {bitwidth_mode}')
|
603
|
+
return act_qc.activation_n_bits if act_qc.enable_activation_quantization else FLOAT_BITWIDTH
|
604
|
+
|
605
|
+
if bitwidth_mode == BitwidthMode.Float or not n.is_activation_quantization_enabled():
|
606
|
+
return FLOAT_BITWIDTH
|
607
|
+
|
608
|
+
if bitwidth_mode == BitwidthMode.Q8Bit:
|
609
|
+
return 8
|
610
|
+
|
611
|
+
if bitwidth_mode in cls._bitwidth_mode_fn:
|
612
|
+
candidates_nbits = [c.activation_quantization_cfg.activation_n_bits for c in n.candidates_quantization_cfg]
|
613
|
+
return cls._bitwidth_mode_fn[bitwidth_mode](candidates_nbits)
|
614
|
+
|
615
|
+
if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]:
|
616
|
+
qcs = n.get_unique_activation_candidates()
|
617
|
+
if len(qcs) != 1: # pragma: no cover
|
618
|
+
raise ValueError(f'Could not retrieve the activation quantization candidate for node {n.name} '
|
619
|
+
f'as it has {len(qcs)}!=1 unique candidates .')
|
620
|
+
return qcs[0].activation_quantization_cfg.activation_n_bits
|
621
|
+
|
622
|
+
raise ValueError(f'Unknown mode {bitwidth_mode}') # pragma: no cover
|
623
|
+
|
624
|
+
@classmethod
|
625
|
+
def _get_weight_nbits(cls,
|
626
|
+
n: BaseNode,
|
627
|
+
w_attr: str,
|
628
|
+
bitwidth_mode: BitwidthMode,
|
629
|
+
w_qc: Optional[NodeWeightsQuantizationConfig]) -> int:
|
630
|
+
"""
|
631
|
+
Get the bit-width of a specific weight of a node according to the requested bit-width mode.
|
632
|
+
|
633
|
+
Args:
|
634
|
+
n: node.
|
635
|
+
w_attr: weight attribute.
|
636
|
+
bitwidth_mode: bit-width mode for the computation.
|
637
|
+
w_qc: weights quantization config for the node. Should be provided only in custom bit mode.
|
638
|
+
Must provide configuration for all configurable weights. For non-configurable weights, will use the
|
639
|
+
provided configuration if found, or extract the default configuration from the node otherwise.
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
Weight bit-width.
|
643
|
+
"""
|
644
|
+
if w_qc and w_qc.has_attribute_config(w_attr):
|
645
|
+
if bitwidth_mode != BitwidthMode.QCustom: # pragma: no cover
|
646
|
+
raise ValueError('Weight config is not expected for non-custom bit mode {bitwidth_mode}')
|
647
|
+
attr_cfg = w_qc.get_attr_config(w_attr)
|
648
|
+
return attr_cfg.weights_n_bits if attr_cfg.enable_weights_quantization else FLOAT_BITWIDTH
|
649
|
+
|
650
|
+
if bitwidth_mode == BitwidthMode.Float or not n.is_weights_quantization_enabled(w_attr):
|
651
|
+
return FLOAT_BITWIDTH
|
652
|
+
|
653
|
+
if bitwidth_mode == BitwidthMode.Q8Bit:
|
654
|
+
return 8
|
655
|
+
|
656
|
+
node_qcs = n.get_unique_weights_candidates(w_attr)
|
657
|
+
w_qcs = [qc.weights_quantization_cfg.get_attr_config(w_attr) for qc in node_qcs]
|
658
|
+
if bitwidth_mode in cls._bitwidth_mode_fn:
|
659
|
+
return cls._bitwidth_mode_fn[bitwidth_mode]([qc.weights_n_bits for qc in w_qcs])
|
660
|
+
|
661
|
+
if bitwidth_mode in [BitwidthMode.QCustom, BitwidthMode.QDefaultSP]:
|
662
|
+
# if configuration was not passed and the weight has only one candidate, use it
|
663
|
+
if len(w_qcs) != 1: # pragma: no cover
|
664
|
+
raise ValueError(f'Could not retrieve the quantization candidate for attr {w_attr} of node {n.name} '
|
665
|
+
f'as it {len(w_qcs)}!=1 unique candidates.')
|
666
|
+
return w_qcs[0].weights_n_bits
|
667
|
+
|
668
|
+
raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover
|