mct-nightly 1.11.0.20240131.post412__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.11.0.20240131.post412.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/METADATA +1 -1
- {mct_nightly-1.11.0.20240131.post412.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/RECORD +33 -33
- model_compression_toolkit/constants.py +1 -0
- model_compression_toolkit/core/common/graph/base_graph.py +1 -1
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +4 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +11 -6
- model_compression_toolkit/core/common/quantization/quantization_config.py +5 -13
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +40 -17
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py +14 -7
- model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +4 -1
- model_compression_toolkit/target_platform_capabilities/constants.py +18 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +175 -33
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +7 -3
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +11 -1
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +64 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +24 -8
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +15 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +64 -20
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +24 -7
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +15 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +60 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +24 -7
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +15 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +52 -11
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +25 -8
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +15 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +52 -13
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +12 -1
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +12 -1
- {mct_nightly-1.11.0.20240131.post412.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.11.0.20240131.post412.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/WHEEL +0 -0
- {mct_nightly-1.11.0.20240131.post412.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/top_level.txt +0 -0
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py
CHANGED
|
@@ -14,58 +14,135 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
|
|
16
16
|
import copy
|
|
17
|
-
from typing import List
|
|
17
|
+
from typing import List, Dict, Union, Any
|
|
18
18
|
|
|
19
19
|
from mct_quantizers import QuantizationMethod
|
|
20
|
+
from model_compression_toolkit.logger import Logger
|
|
20
21
|
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any:
|
|
23
24
|
"""
|
|
24
|
-
|
|
25
|
+
Clones the given object and edit some of its parameters.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
obj: An object to clone.
|
|
29
|
+
**kwargs: Keyword arguments to edit in the cloned object.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Edited copy of the given object.
|
|
25
33
|
"""
|
|
26
34
|
|
|
35
|
+
obj_copy = copy.deepcopy(obj)
|
|
36
|
+
for k, v in kwargs.items():
|
|
37
|
+
assert hasattr(obj_copy,
|
|
38
|
+
k), f'Edit parameter is possible only for existing parameters in the given object, ' \
|
|
39
|
+
f'but {k} is not a parameter of {obj_copy}.'
|
|
40
|
+
setattr(obj_copy, k, v)
|
|
41
|
+
return obj_copy
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AttributeQuantizationConfig:
|
|
45
|
+
"""
|
|
46
|
+
Hold the quantization configuration of a weight attribute of a layer.
|
|
47
|
+
"""
|
|
27
48
|
def __init__(self,
|
|
28
|
-
activation_quantization_method: QuantizationMethod,
|
|
29
49
|
weights_quantization_method: QuantizationMethod,
|
|
30
|
-
activation_n_bits: int,
|
|
31
50
|
weights_n_bits: int,
|
|
32
51
|
weights_per_channel_threshold: bool,
|
|
33
52
|
enable_weights_quantization: bool,
|
|
53
|
+
lut_values_bitwidth: Union[int, None], # If None - set 8 in hptq, o.w use it
|
|
54
|
+
):
|
|
55
|
+
"""
|
|
56
|
+
Initializes an attribute quantization config.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
|
|
60
|
+
weights_n_bits (int): Number of bits to quantize the coefficients.
|
|
61
|
+
weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
|
|
62
|
+
enable_weights_quantization (bool): Whether to quantize the model weights or not.
|
|
63
|
+
lut_values_bitwidth (int): Number of bits to use when quantizing in look-up-table.
|
|
64
|
+
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
self.weights_quantization_method = weights_quantization_method
|
|
68
|
+
self.weights_n_bits = weights_n_bits
|
|
69
|
+
self.weights_per_channel_threshold = weights_per_channel_threshold
|
|
70
|
+
self.enable_weights_quantization = enable_weights_quantization
|
|
71
|
+
self.lut_values_bitwidth = lut_values_bitwidth
|
|
72
|
+
|
|
73
|
+
def clone_and_edit(self, **kwargs):
|
|
74
|
+
"""
|
|
75
|
+
Clone the quantization config and edit some of its attributes.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
**kwargs: Keyword arguments to edit the configuration to clone.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Edited quantization configuration.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
return clone_and_edit_object_params(self, **kwargs)
|
|
85
|
+
|
|
86
|
+
def __eq__(self, other):
|
|
87
|
+
"""
|
|
88
|
+
Is this configuration equal to another object.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
other: Object to compare.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
|
|
95
|
+
Whether this configuration is equal to another object or not.
|
|
96
|
+
"""
|
|
97
|
+
if not isinstance(other, AttributeQuantizationConfig):
|
|
98
|
+
return False
|
|
99
|
+
return self.weights_quantization_method == other.weights_quantization_method and \
|
|
100
|
+
self.weights_n_bits == other.weights_n_bits and \
|
|
101
|
+
self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
|
|
102
|
+
self.enable_weights_quantization == other.enable_weights_quantization and \
|
|
103
|
+
self.lut_values_bitwidth == other.lut_values_bitwidth
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class OpQuantizationConfig:
|
|
107
|
+
"""
|
|
108
|
+
OpQuantizationConfig is a class to configure the quantization parameters of an operator.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self,
|
|
112
|
+
default_weight_attr_config: AttributeQuantizationConfig,
|
|
113
|
+
attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
|
|
114
|
+
activation_quantization_method: QuantizationMethod,
|
|
115
|
+
activation_n_bits: int,
|
|
34
116
|
enable_activation_quantization: bool,
|
|
35
117
|
quantization_preserving: bool,
|
|
36
118
|
fixed_scale: float,
|
|
37
119
|
fixed_zero_point: int,
|
|
38
|
-
|
|
39
|
-
|
|
120
|
+
simd_size: int
|
|
121
|
+
):
|
|
40
122
|
"""
|
|
41
123
|
|
|
42
124
|
Args:
|
|
125
|
+
default_weight_attr_config (AttributeQuantizationConfig): A default attribute quantization configuration for the operation.
|
|
126
|
+
attr_weights_configs_mapping (dict): A mapping between an op attribute name and its quantization configuration.
|
|
43
127
|
activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
|
|
44
|
-
weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
|
|
45
128
|
activation_n_bits (int): Number of bits to quantize the activations.
|
|
46
|
-
weights_n_bits (int): Number of bits to quantize the coefficients.
|
|
47
|
-
weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
|
|
48
|
-
enable_weights_quantization (bool): Whether to quantize the model weights or not.
|
|
49
129
|
enable_activation_quantization (bool): Whether to quantize the model activations or not.
|
|
50
130
|
quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
|
|
51
131
|
fixed_scale (float): Scale to use for an operator quantization parameters.
|
|
52
132
|
fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
|
|
53
|
-
|
|
54
|
-
simd_size (int): An integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
|
|
133
|
+
simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
|
|
55
134
|
|
|
56
135
|
"""
|
|
57
136
|
|
|
137
|
+
self.default_weight_attr_config = default_weight_attr_config
|
|
138
|
+
self.attr_weights_configs_mapping = attr_weights_configs_mapping
|
|
139
|
+
|
|
58
140
|
self.activation_quantization_method = activation_quantization_method
|
|
59
|
-
self.weights_quantization_method = weights_quantization_method
|
|
60
141
|
self.activation_n_bits = activation_n_bits
|
|
61
|
-
self.weights_n_bits = weights_n_bits
|
|
62
|
-
self.weights_per_channel_threshold = weights_per_channel_threshold
|
|
63
|
-
self.enable_weights_quantization = enable_weights_quantization
|
|
64
142
|
self.enable_activation_quantization = enable_activation_quantization
|
|
65
143
|
self.quantization_preserving = quantization_preserving
|
|
66
144
|
self.fixed_scale = fixed_scale
|
|
67
145
|
self.fixed_zero_point = fixed_zero_point
|
|
68
|
-
self.eights_lut_values_bitwidth = weights_multiplier_nbits
|
|
69
146
|
self.simd_size = simd_size
|
|
70
147
|
|
|
71
148
|
def get_info(self):
|
|
@@ -76,22 +153,28 @@ class OpQuantizationConfig:
|
|
|
76
153
|
"""
|
|
77
154
|
return self.__dict__
|
|
78
155
|
|
|
79
|
-
def clone_and_edit(self, **kwargs):
|
|
156
|
+
def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs):
|
|
80
157
|
"""
|
|
81
158
|
Clone the quantization config and edit some of its attributes.
|
|
82
159
|
Args:
|
|
160
|
+
attr_to_edit: A mapping between attributes names to edit and their parameters that
|
|
161
|
+
should be edited to a new value.
|
|
83
162
|
**kwargs: Keyword arguments to edit the configuration to clone.
|
|
84
163
|
|
|
85
164
|
Returns:
|
|
86
165
|
Edited quantization configuration.
|
|
87
166
|
"""
|
|
88
167
|
|
|
89
|
-
qc =
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
168
|
+
qc = clone_and_edit_object_params(self, **kwargs)
|
|
169
|
+
|
|
170
|
+
# optionally: editing specific parameters in the config of specified attributes
|
|
171
|
+
edited_attrs = copy.deepcopy(qc.attr_weights_configs_mapping)
|
|
172
|
+
for attr_name, attr_cfg in qc.attr_weights_configs_mapping.items():
|
|
173
|
+
if attr_name in attr_to_edit:
|
|
174
|
+
edited_attrs[attr_name] = attr_cfg.clone_and_edit(**attr_to_edit[attr_name])
|
|
175
|
+
|
|
176
|
+
qc.attr_weights_configs_mapping = edited_attrs
|
|
177
|
+
|
|
95
178
|
return qc
|
|
96
179
|
|
|
97
180
|
def __eq__(self, other):
|
|
@@ -105,14 +188,12 @@ class OpQuantizationConfig:
|
|
|
105
188
|
"""
|
|
106
189
|
if not isinstance(other, OpQuantizationConfig):
|
|
107
190
|
return False
|
|
108
|
-
return self.
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
self.enable_activation_quantization == other.enable_activation_quantization and \
|
|
115
|
-
self.simd_size==other.simd_size
|
|
191
|
+
return self.default_weight_attr_config == other.default_weight_attr_config and \
|
|
192
|
+
self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
|
|
193
|
+
self.activation_quantization_method == other.activation_quantization_method and \
|
|
194
|
+
self.activation_n_bits == other.activation_n_bits and \
|
|
195
|
+
self.enable_activation_quantization == other.enable_activation_quantization and \
|
|
196
|
+
self.simd_size == other.simd_size
|
|
116
197
|
|
|
117
198
|
|
|
118
199
|
class QuantizationConfigOptions(object):
|
|
@@ -177,6 +258,67 @@ class QuantizationConfigOptions(object):
|
|
|
177
258
|
self.__edit_quantization_configuration(qc, kwargs)
|
|
178
259
|
return qc_options
|
|
179
260
|
|
|
261
|
+
def clone_and_edit_weight_attribute(self, attrs: List[str] = None, **kwargs):
|
|
262
|
+
"""
|
|
263
|
+
Clones the quantization configurations and edits some of their attributes' parameters.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
attrs: attributes names to clone their configurations. If None is provided, updating the configurations
|
|
267
|
+
of all attributes in the operation attributes config mapping.
|
|
268
|
+
**kwargs: Keyword arguments to edit in the attributes configuration.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
QuantizationConfigOptions with edited attributes configurations.
|
|
272
|
+
|
|
273
|
+
"""
|
|
274
|
+
|
|
275
|
+
qc_options = copy.deepcopy(self)
|
|
276
|
+
|
|
277
|
+
for qc in qc_options.quantization_config_list:
|
|
278
|
+
if attrs is None:
|
|
279
|
+
attrs_to_update = list(qc.attr_weights_configs_mapping.keys())
|
|
280
|
+
else:
|
|
281
|
+
if not isinstance(attrs, List):
|
|
282
|
+
Logger.error(f"Expecting a list of attribute but got {type(attrs)}.")
|
|
283
|
+
attrs_to_update = attrs
|
|
284
|
+
|
|
285
|
+
for attr in attrs_to_update:
|
|
286
|
+
if qc.attr_weights_configs_mapping.get(attr) is None:
|
|
287
|
+
Logger.error(f'Edit attributes is possible only for existing attributes '
|
|
288
|
+
f'in the configuration weights config mapping, but {attr} is not an attribute of {qc}.')
|
|
289
|
+
self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs)
|
|
290
|
+
return qc_options
|
|
291
|
+
|
|
292
|
+
def clone_and_map_weights_attr_keys(self, layer_attrs_mapping: Union[Dict[str, str], None]):
|
|
293
|
+
"""
|
|
294
|
+
Clones the quantization configuration options and edits the keys in each configuration attributes config mapping,
|
|
295
|
+
based on the given attributes names mapping.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
layer_attrs_mapping: A mapping between attributes names.
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
QuantizationConfigOptions with edited attributes names.
|
|
302
|
+
|
|
303
|
+
"""
|
|
304
|
+
qc_options = copy.deepcopy(self)
|
|
305
|
+
|
|
306
|
+
for qc in qc_options.quantization_config_list:
|
|
307
|
+
if layer_attrs_mapping is None:
|
|
308
|
+
qc.attr_weights_configs_mapping = {}
|
|
309
|
+
else:
|
|
310
|
+
new_attr_mapping = {}
|
|
311
|
+
for attr in list(qc.attr_weights_configs_mapping.keys()):
|
|
312
|
+
new_key = layer_attrs_mapping.get(attr)
|
|
313
|
+
if new_key is None:
|
|
314
|
+
Logger.error(f"Attribute {attr} does not exist in the given attribute mapping.")
|
|
315
|
+
|
|
316
|
+
new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr)
|
|
317
|
+
|
|
318
|
+
qc.attr_weights_configs_mapping.update(new_attr_mapping)
|
|
319
|
+
|
|
320
|
+
return qc_options
|
|
321
|
+
|
|
180
322
|
def __edit_quantization_configuration(self, qc, kwargs):
|
|
181
323
|
for k, v in kwargs.items():
|
|
182
324
|
assert hasattr(qc,
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
|
|
16
|
-
from typing import List, Any
|
|
16
|
+
from typing import List, Any, Dict
|
|
17
17
|
|
|
18
18
|
from model_compression_toolkit.logger import Logger
|
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc
|
|
@@ -22,21 +22,25 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.oper
|
|
|
22
22
|
OperatorsSetBase
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
|
|
26
25
|
class OperationsSetToLayers(TargetPlatformCapabilitiesComponent):
|
|
27
26
|
"""
|
|
28
27
|
Associate an OperatorsSet to a list of framework's layers.
|
|
29
28
|
"""
|
|
30
29
|
def __init__(self,
|
|
31
30
|
op_set_name: str,
|
|
32
|
-
layers: List[Any]
|
|
31
|
+
layers: List[Any],
|
|
32
|
+
attr_mapping: Dict[str, Any] = None):
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
35
|
Args:
|
|
36
36
|
op_set_name (str): Name of OperatorsSet to associate with layers.
|
|
37
37
|
layers (List[Any]): List of layers/FilterLayerParams to associate with OperatorsSet.
|
|
38
|
+
attr_mapping (dict): A mapping between a general attribute name to a DefaultDict that maps a layer
|
|
39
|
+
+ type to the layer's framework name of this attribute (the dictionary type is not specified to
|
|
40
|
+
+ handle circular dependency).
|
|
38
41
|
"""
|
|
39
42
|
self.layers = layers
|
|
43
|
+
self.attr_mapping = attr_mapping
|
|
40
44
|
super(OperationsSetToLayers, self).__init__(name=op_set_name)
|
|
41
45
|
_current_tpc.get().remove_opset_from_not_used_list(op_set_name)
|
|
42
46
|
|
|
@@ -188,6 +188,16 @@ class TargetPlatformCapabilities(ImmutableClass):
|
|
|
188
188
|
qco = self.tp_model.get_config_options_by_operators_set(op2layers.name)
|
|
189
189
|
if qco is None:
|
|
190
190
|
qco = self.tp_model.default_qco
|
|
191
|
+
|
|
192
|
+
# here, we need to take care of mapping a general attribute name into a framework and
|
|
193
|
+
# layer type specific attribute name.
|
|
194
|
+
# attr_mapping is a mapping between an attribute generic name to a dictionary that maps each
|
|
195
|
+
# layer type to its framework-specific attribute name.
|
|
196
|
+
# in the loop below, v is the inner dictionary.
|
|
197
|
+
layer_attrs_mapping = None if op2layers.attr_mapping is None else \
|
|
198
|
+
{k: v.get(l) for k, v in op2layers.attr_mapping.items()}
|
|
199
|
+
qco = qco.clone_and_map_weights_attr_keys(layer_attrs_mapping)
|
|
200
|
+
|
|
191
201
|
if isinstance(l, LayerFilterParams):
|
|
192
202
|
filterlayer2qco.update({l: qco})
|
|
193
203
|
else:
|
|
@@ -231,4 +241,4 @@ class TargetPlatformCapabilities(ImmutableClass):
|
|
|
231
241
|
Returns: Check if the TP model defines that padding due to SIMD constrains occurs.
|
|
232
242
|
|
|
233
243
|
"""
|
|
234
|
-
return self.tp_model.is_simd_padding
|
|
244
|
+
return self.tp_model.is_simd_padding
|
|
@@ -15,8 +15,12 @@
|
|
|
15
15
|
from typing import List, Tuple
|
|
16
16
|
|
|
17
17
|
import model_compression_toolkit as mct
|
|
18
|
+
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
|
19
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
|
18
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
|
19
21
|
TargetPlatformModel
|
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
|
23
|
+
AttributeQuantizationConfig
|
|
20
24
|
|
|
21
25
|
tp = mct.target_platform
|
|
22
26
|
|
|
@@ -32,14 +36,14 @@ def get_tp_model() -> TargetPlatformModel:
|
|
|
32
36
|
Returns: A TargetPlatformModel object.
|
|
33
37
|
|
|
34
38
|
"""
|
|
35
|
-
base_config, mixed_precision_cfg_list = get_op_quantization_configs()
|
|
36
|
-
return generate_tp_model(default_config=
|
|
39
|
+
base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
|
|
40
|
+
return generate_tp_model(default_config=default_config,
|
|
37
41
|
base_config=base_config,
|
|
38
42
|
mixed_precision_cfg_list=mixed_precision_cfg_list,
|
|
39
43
|
name='imx500_tp_model')
|
|
40
44
|
|
|
41
45
|
|
|
42
|
-
def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
|
|
46
|
+
def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
|
43
47
|
"""
|
|
44
48
|
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
|
|
45
49
|
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
|
@@ -48,21 +52,63 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
|
48
52
|
Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
|
|
49
53
|
|
|
50
54
|
"""
|
|
55
|
+
|
|
56
|
+
# TODO: currently, we don't want to quantize any attribute but the kernel by default,
|
|
57
|
+
# to preserve the current behavior of MCT, so quantization is disabled for all other attributes.
|
|
58
|
+
# Other quantization parameters are set to what we eventually want to quantize by default
|
|
59
|
+
# when we enable multi-attributes quantization - THIS NEED TO BE MODIFIED IN ALL TP MODELS!
|
|
60
|
+
|
|
61
|
+
# define a default quantization config for all non-specified weights attributes.
|
|
62
|
+
default_weight_attr_config = AttributeQuantizationConfig(
|
|
63
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
64
|
+
weights_n_bits=8,
|
|
65
|
+
weights_per_channel_threshold=False,
|
|
66
|
+
enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization
|
|
67
|
+
lut_values_bitwidth=None)
|
|
68
|
+
|
|
69
|
+
# define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
|
70
|
+
kernel_base_config = AttributeQuantizationConfig(
|
|
71
|
+
weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
|
|
72
|
+
weights_n_bits=8,
|
|
73
|
+
weights_per_channel_threshold=True,
|
|
74
|
+
enable_weights_quantization=True,
|
|
75
|
+
lut_values_bitwidth=None)
|
|
76
|
+
|
|
77
|
+
# define a quantization config to quantize the bias (for layers where there is a bias attribute).
|
|
78
|
+
bias_config = AttributeQuantizationConfig(
|
|
79
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
80
|
+
weights_n_bits=FLOAT_BITWIDTH,
|
|
81
|
+
weights_per_channel_threshold=False,
|
|
82
|
+
enable_weights_quantization=False,
|
|
83
|
+
lut_values_bitwidth=None)
|
|
84
|
+
|
|
51
85
|
# Create a quantization config.
|
|
52
86
|
# A quantization configuration defines how an operator
|
|
53
87
|
# should be quantized on the modeled hardware:
|
|
54
|
-
|
|
88
|
+
|
|
89
|
+
# We define a default config for operation without kernel attribute.
|
|
90
|
+
# This is the default config that should be used for non-linear operations.
|
|
91
|
+
eight_bits_default = tp.OpQuantizationConfig(
|
|
92
|
+
default_weight_attr_config=default_weight_attr_config,
|
|
93
|
+
attr_weights_configs_mapping={},
|
|
94
|
+
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
95
|
+
activation_n_bits=8,
|
|
96
|
+
enable_activation_quantization=True,
|
|
97
|
+
quantization_preserving=False,
|
|
98
|
+
fixed_scale=None,
|
|
99
|
+
fixed_zero_point=None,
|
|
100
|
+
simd_size=32)
|
|
101
|
+
|
|
102
|
+
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
|
103
|
+
linear_eight_bits = tp.OpQuantizationConfig(
|
|
104
|
+
default_weight_attr_config=default_weight_attr_config,
|
|
105
|
+
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
|
55
106
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
56
|
-
weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
|
|
57
107
|
activation_n_bits=8,
|
|
58
|
-
weights_n_bits=8,
|
|
59
|
-
weights_per_channel_threshold=True,
|
|
60
|
-
enable_weights_quantization=True,
|
|
61
108
|
enable_activation_quantization=True,
|
|
62
109
|
quantization_preserving=False,
|
|
63
110
|
fixed_scale=None,
|
|
64
111
|
fixed_zero_point=None,
|
|
65
|
-
weights_multiplier_nbits=None,
|
|
66
112
|
simd_size=32)
|
|
67
113
|
|
|
68
114
|
# To quantize a model using mixed-precision, create
|
|
@@ -70,14 +116,14 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
|
70
116
|
# In this example, we quantize some operations' weights
|
|
71
117
|
# using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
|
|
72
118
|
# to quantize the operations' activations using LUT.
|
|
73
|
-
four_bits =
|
|
74
|
-
|
|
75
|
-
two_bits =
|
|
76
|
-
|
|
119
|
+
four_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}},
|
|
120
|
+
simd_size=linear_eight_bits.simd_size * 2)
|
|
121
|
+
two_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}},
|
|
122
|
+
simd_size=linear_eight_bits.simd_size * 4)
|
|
77
123
|
|
|
78
|
-
mixed_precision_cfg_list = [
|
|
124
|
+
mixed_precision_cfg_list = [linear_eight_bits, four_bits, two_bits]
|
|
79
125
|
|
|
80
|
-
return
|
|
126
|
+
return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
|
|
81
127
|
|
|
82
128
|
|
|
83
129
|
def generate_tp_model(default_config: OpQuantizationConfig,
|
|
@@ -121,10 +167,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
|
121
167
|
generated_tpc.set_simd_padding(is_simd_padding=True)
|
|
122
168
|
|
|
123
169
|
# May suit for operations like: Dropout, Reshape, etc.
|
|
170
|
+
default_qco = tp.get_default_quantization_config_options()
|
|
124
171
|
tp.OperatorsSet("NoQuantization",
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
enable_activation_quantization=False))
|
|
172
|
+
default_qco.clone_and_edit(enable_activation_quantization=False)
|
|
173
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
|
128
174
|
|
|
129
175
|
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
|
130
176
|
mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py
CHANGED
|
@@ -14,7 +14,11 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
import tensorflow as tf
|
|
16
16
|
from packaging import version
|
|
17
|
+
|
|
18
|
+
from model_compression_toolkit.defaultdict import DefaultDict
|
|
17
19
|
from model_compression_toolkit.constants import FOUND_SONY_CUSTOM_LAYERS
|
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \
|
|
21
|
+
KERAS_KERNEL, BIAS_ATTR, BIAS
|
|
18
22
|
|
|
19
23
|
if FOUND_SONY_CUSTOM_LAYERS:
|
|
20
24
|
from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
|
|
@@ -85,14 +89,26 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
|
85
89
|
|
|
86
90
|
with keras_tpc:
|
|
87
91
|
tp.OperationsSetToLayers("NoQuantization", no_quant_list)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
tp.OperationsSetToLayers("Conv",
|
|
93
|
+
[Conv2D,
|
|
94
|
+
DepthwiseConv2D,
|
|
95
|
+
Conv2DTranspose,
|
|
96
|
+
tf.nn.conv2d,
|
|
97
|
+
tf.nn.depthwise_conv2d,
|
|
98
|
+
tf.nn.conv2d_transpose],
|
|
99
|
+
# we provide attributes mapping that maps each layer type in the operations set
|
|
100
|
+
# that has weights attributes with provided quantization config (in the tp model) to
|
|
101
|
+
# its framework-specific attribute name.
|
|
102
|
+
# note that a DefaultDict should be provided if not all the layer types in the
|
|
103
|
+
# operation set are provided separately in the mapping.
|
|
104
|
+
attr_mapping={
|
|
105
|
+
KERNEL_ATTR: DefaultDict({
|
|
106
|
+
DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
|
|
107
|
+
tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
|
|
108
|
+
BIAS_ATTR: DefaultDict(default_value=BIAS)})
|
|
109
|
+
tp.OperationsSetToLayers("FullyConnected", [Dense],
|
|
110
|
+
attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
|
|
111
|
+
BIAS_ATTR: DefaultDict(default_value=BIAS)})
|
|
96
112
|
tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
|
|
97
113
|
tf.nn.relu6,
|
|
98
114
|
tf.nn.leaky_relu,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py
CHANGED
|
@@ -23,6 +23,9 @@ from torch.nn import Dropout, Flatten, Hardtanh
|
|
|
23
23
|
from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
|
|
24
24
|
from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
|
|
25
25
|
|
|
26
|
+
from model_compression_toolkit.defaultdict import DefaultDict
|
|
27
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \
|
|
28
|
+
BIAS
|
|
26
29
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model
|
|
27
30
|
import model_compression_toolkit as mct
|
|
28
31
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1 import __version__ as TPC_VERSION
|
|
@@ -52,6 +55,14 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
|
52
55
|
name=name,
|
|
53
56
|
version=TPC_VERSION)
|
|
54
57
|
|
|
58
|
+
# we provide attributes mapping that maps each layer type in the operations set
|
|
59
|
+
# that has weights attributes with provided quantization config (in the tp model) to
|
|
60
|
+
# its framework-specific attribute name.
|
|
61
|
+
# note that a DefaultDict should be provided if not all the layer types in the
|
|
62
|
+
# operation set are provided separately in the mapping.
|
|
63
|
+
pytorch_linear_attr_mapping = {KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
|
|
64
|
+
BIAS_ATTR: DefaultDict(default_value=BIAS)}
|
|
65
|
+
|
|
55
66
|
with pytorch_tpc:
|
|
56
67
|
tp.OperationsSetToLayers("NoQuantization", [Dropout,
|
|
57
68
|
Flatten,
|
|
@@ -72,8 +83,10 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
|
72
83
|
gather,
|
|
73
84
|
topk])
|
|
74
85
|
|
|
75
|
-
tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d]
|
|
76
|
-
|
|
86
|
+
tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d],
|
|
87
|
+
attr_mapping=pytorch_linear_attr_mapping)
|
|
88
|
+
tp.OperationsSetToLayers("FullyConnected", [Linear],
|
|
89
|
+
attr_mapping=pytorch_linear_attr_mapping)
|
|
77
90
|
tp.OperationsSetToLayers("AnyReLU", [torch.relu,
|
|
78
91
|
ReLU,
|
|
79
92
|
ReLU6,
|