mct-nightly 1.11.0.20240130.post401__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/METADATA +1 -1
  2. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/RECORD +44 -44
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +1 -0
  5. model_compression_toolkit/core/__init__.py +0 -1
  6. model_compression_toolkit/core/common/framework_info.py +1 -2
  7. model_compression_toolkit/core/common/graph/base_graph.py +1 -1
  8. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +4 -2
  9. model_compression_toolkit/core/common/quantization/node_quantization_config.py +11 -6
  10. model_compression_toolkit/core/common/quantization/quantization_config.py +5 -13
  11. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +1 -1
  12. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +40 -17
  13. model_compression_toolkit/core/common/substitutions/scale_equalization.py +1 -1
  14. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  15. model_compression_toolkit/core/pytorch/default_framework_info.py +2 -2
  16. model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py +14 -7
  17. model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +4 -1
  18. model_compression_toolkit/gptq/common/gptq_config.py +1 -3
  19. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  20. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  21. model_compression_toolkit/target_platform_capabilities/constants.py +18 -1
  22. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +1 -1
  23. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +175 -33
  24. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +7 -3
  25. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +11 -1
  26. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +64 -18
  27. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +24 -8
  28. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +15 -2
  29. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +64 -20
  30. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +24 -7
  31. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +15 -2
  32. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +60 -18
  33. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +24 -7
  34. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +15 -2
  35. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +52 -11
  36. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +25 -8
  37. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +15 -2
  38. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +52 -13
  39. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +12 -1
  40. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +12 -1
  41. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/LICENSE.md +0 -0
  42. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/WHEEL +0 -0
  43. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/top_level.txt +0 -0
  44. /model_compression_toolkit/{core/common/defaultdict.py → defaultdict.py} +0 -0
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  import os
16
+ from pathlib import Path
16
17
  import tempfile
17
18
  from typing import Callable
18
19
 
@@ -56,14 +57,20 @@ class FakelyQuantTFLiteExporter(FakelyQuantKerasExporter):
56
57
 
57
58
  """
58
59
  # Use Keras exporter to quantize model's weights before converting it to TFLite.
59
- # Since exporter saves the model, we use a tmp path for saving, and then we delete it automatically.
60
- with tempfile.NamedTemporaryFile(suffix=DEFAULT_KERAS_EXPORT_EXTENTION) as tmp_file:
61
- FakelyQuantKerasExporter(self.model,
62
- self.is_layer_exportable_fn,
63
- tmp_file.name,
64
- verbose=False).export()
60
+ # Since exporter saves the model, we use a tmp path for saving, and then we delete it.
61
+ handle, tmp_file = tempfile.mkstemp(DEFAULT_KERAS_EXPORT_EXTENTION)
62
+ # Close handle right away, the file is going to be reopenned by Keras exporter
63
+ os.close(handle)
64
+ try:
65
+ custom_objects = FakelyQuantKerasExporter(self.model,
66
+ self.is_layer_exportable_fn,
67
+ tmp_file,
68
+ verbose=False).export()
65
69
 
66
- model = keras_load_quantized_model(tmp_file.name)
70
+ model = keras_load_quantized_model(tmp_file)
71
+ # Ensures artifact is removed even in case of error
72
+ finally:
73
+ Path(tmp_file).unlink(missing_ok=True)
67
74
 
68
75
  self.exported_model = tf.lite.TFLiteConverter.from_keras_model(model).convert()
69
76
  Logger.info(f'Exporting FQ tflite model to: {self.save_model_path}')
@@ -26,8 +26,11 @@ from mct_quantizers import pytorch_quantizers
26
26
  # ONNX opset version 16 is supported from PyTorch 1.12
27
27
  if version.parse(torch.__version__) < version.parse("1.12"):
28
28
  OPSET_VERSION = 15
29
- else:
29
+ elif version.parse("1.12.0") <= version.parse(torch.__version__) < version.parse("1.13.0"):
30
30
  OPSET_VERSION = 16
31
+ else:
32
+ # ONNX opset version 17 is supported from PyTorch 1.13
33
+ OPSET_VERSION = 17
31
34
 
32
35
 
33
36
  class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
@@ -14,9 +14,7 @@
14
14
  # ==============================================================================
15
15
  from enum import Enum
16
16
  from typing import Callable, Any, Dict
17
- from model_compression_toolkit.core.common.defaultdict import DefaultDict
18
- from model_compression_toolkit.core import common
19
- from model_compression_toolkit.gptq.common.gptq_constants import QUANT_PARAM_LEARNING_STR, MAX_LSB_STR, REG_DEFAULT
17
+ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
20
18
 
21
19
 
22
20
  class RoundingType(Enum):
@@ -24,7 +24,7 @@ from mct_quantizers import QuantizationTarget
24
24
  from model_compression_toolkit.gptq.common.gptq_constants import AUXVAR, PTQ_THRESHOLD
25
25
  from model_compression_toolkit.gptq.keras.quantizer import quant_utils as qutils
26
26
  from model_compression_toolkit.constants import THRESHOLD
27
- from model_compression_toolkit.core.common.defaultdict import DefaultDict
27
+ from model_compression_toolkit.defaultdict import DefaultDict
28
28
  from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
29
29
  from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
30
30
  from mct_quantizers import mark_quantizer
@@ -16,7 +16,7 @@ import torch
16
16
  import torch.nn as nn
17
17
  from typing import Dict
18
18
  import numpy as np
19
- from model_compression_toolkit.core.common.defaultdict import DefaultDict
19
+ from model_compression_toolkit.defaultdict import DefaultDict
20
20
 
21
21
  from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
22
22
  from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
@@ -24,4 +24,21 @@ LATEST = 'latest'
24
24
  DEFAULT_TP_MODEL = 'default'
25
25
  IMX500_TP_MODEL = 'imx500'
26
26
  TFLITE_TP_MODEL = 'tflite'
27
- QNNPACK_TP_MODEL = 'qnnpack'
27
+ QNNPACK_TP_MODEL = 'qnnpack'
28
+
29
+ # TP Attributes
30
+ KERNEL_ATTR = "kernel_attr"
31
+ BIAS_ATTR = "bias_attr"
32
+
33
+ # TODO: this is duplicated from the core frameworks constants files, because the original consts can't be used here
34
+ # duo to circular dependency. It might be best to extract the constants from the core file and put them here (in a
35
+ # separate changeset, because it affects the entire code)
36
+ KERAS_KERNEL = "kernel"
37
+ KERAS_DEPTHWISE_KERNEL = "depthwise_kernel"
38
+ BIAS = "bias"
39
+ PYTORCH_KERNEL = "weight"
40
+
41
+ # Configuration attributes names
42
+
43
+ WEIGHTS_N_BITS = 'weights_n_bits'
44
+ WEIGHTS_QUANTIZATION_METHOD = 'weights_quantization_method'
@@ -21,7 +21,7 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.targ
21
21
  get_default_quantization_config_options, TargetPlatformModel
22
22
 
23
23
  from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \
24
- QuantizationConfigOptions
24
+ QuantizationConfigOptions, AttributeQuantizationConfig
25
25
  from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSet, OperatorSetConcat
26
26
 
27
27
  from mct_quantizers import QuantizationMethod
@@ -14,58 +14,135 @@
14
14
  # ==============================================================================
15
15
 
16
16
  import copy
17
- from typing import List
17
+ from typing import List, Dict, Union, Any
18
18
 
19
19
  from mct_quantizers import QuantizationMethod
20
+ from model_compression_toolkit.logger import Logger
20
21
 
21
22
 
22
- class OpQuantizationConfig:
23
+ def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any:
23
24
  """
24
- OpQuantizationConfig is a class to configure the quantization parameters of an operator.
25
+ Clones the given object and edit some of its parameters.
26
+
27
+ Args:
28
+ obj: An object to clone.
29
+ **kwargs: Keyword arguments to edit in the cloned object.
30
+
31
+ Returns:
32
+ Edited copy of the given object.
25
33
  """
26
34
 
35
+ obj_copy = copy.deepcopy(obj)
36
+ for k, v in kwargs.items():
37
+ assert hasattr(obj_copy,
38
+ k), f'Edit parameter is possible only for existing parameters in the given object, ' \
39
+ f'but {k} is not a parameter of {obj_copy}.'
40
+ setattr(obj_copy, k, v)
41
+ return obj_copy
42
+
43
+
44
+ class AttributeQuantizationConfig:
45
+ """
46
+ Hold the quantization configuration of a weight attribute of a layer.
47
+ """
27
48
  def __init__(self,
28
- activation_quantization_method: QuantizationMethod,
29
49
  weights_quantization_method: QuantizationMethod,
30
- activation_n_bits: int,
31
50
  weights_n_bits: int,
32
51
  weights_per_channel_threshold: bool,
33
52
  enable_weights_quantization: bool,
53
+ lut_values_bitwidth: Union[int, None], # If None - set 8 in hptq, o.w use it
54
+ ):
55
+ """
56
+ Initializes an attribute quantization config.
57
+
58
+ Args:
59
+ weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
60
+ weights_n_bits (int): Number of bits to quantize the coefficients.
61
+ weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
62
+ enable_weights_quantization (bool): Whether to quantize the model weights or not.
63
+ lut_values_bitwidth (int): Number of bits to use when quantizing in look-up-table.
64
+
65
+ """
66
+
67
+ self.weights_quantization_method = weights_quantization_method
68
+ self.weights_n_bits = weights_n_bits
69
+ self.weights_per_channel_threshold = weights_per_channel_threshold
70
+ self.enable_weights_quantization = enable_weights_quantization
71
+ self.lut_values_bitwidth = lut_values_bitwidth
72
+
73
+ def clone_and_edit(self, **kwargs):
74
+ """
75
+ Clone the quantization config and edit some of its attributes.
76
+
77
+ Args:
78
+ **kwargs: Keyword arguments to edit the configuration to clone.
79
+
80
+ Returns:
81
+ Edited quantization configuration.
82
+ """
83
+
84
+ return clone_and_edit_object_params(self, **kwargs)
85
+
86
+ def __eq__(self, other):
87
+ """
88
+ Is this configuration equal to another object.
89
+
90
+ Args:
91
+ other: Object to compare.
92
+
93
+ Returns:
94
+
95
+ Whether this configuration is equal to another object or not.
96
+ """
97
+ if not isinstance(other, AttributeQuantizationConfig):
98
+ return False
99
+ return self.weights_quantization_method == other.weights_quantization_method and \
100
+ self.weights_n_bits == other.weights_n_bits and \
101
+ self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
102
+ self.enable_weights_quantization == other.enable_weights_quantization and \
103
+ self.lut_values_bitwidth == other.lut_values_bitwidth
104
+
105
+
106
+ class OpQuantizationConfig:
107
+ """
108
+ OpQuantizationConfig is a class to configure the quantization parameters of an operator.
109
+ """
110
+
111
+ def __init__(self,
112
+ default_weight_attr_config: AttributeQuantizationConfig,
113
+ attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
114
+ activation_quantization_method: QuantizationMethod,
115
+ activation_n_bits: int,
34
116
  enable_activation_quantization: bool,
35
117
  quantization_preserving: bool,
36
118
  fixed_scale: float,
37
119
  fixed_zero_point: int,
38
- weights_multiplier_nbits: int, # If None - set 8 in hptq, o.w use it
39
- simd_size: int):
120
+ simd_size: int
121
+ ):
40
122
  """
41
123
 
42
124
  Args:
125
+ default_weight_attr_config (AttributeQuantizationConfig): A default attribute quantization configuration for the operation.
126
+ attr_weights_configs_mapping (dict): A mapping between an op attribute name and its quantization configuration.
43
127
  activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
44
- weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
45
128
  activation_n_bits (int): Number of bits to quantize the activations.
46
- weights_n_bits (int): Number of bits to quantize the coefficients.
47
- weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
48
- enable_weights_quantization (bool): Whether to quantize the model weights or not.
49
129
  enable_activation_quantization (bool): Whether to quantize the model activations or not.
50
130
  quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
51
131
  fixed_scale (float): Scale to use for an operator quantization parameters.
52
132
  fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
53
- weights_multiplier_nbits (int): Number of bits to use when quantizing in look-up-table.
54
- simd_size (int): An integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
133
+ simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
55
134
 
56
135
  """
57
136
 
137
+ self.default_weight_attr_config = default_weight_attr_config
138
+ self.attr_weights_configs_mapping = attr_weights_configs_mapping
139
+
58
140
  self.activation_quantization_method = activation_quantization_method
59
- self.weights_quantization_method = weights_quantization_method
60
141
  self.activation_n_bits = activation_n_bits
61
- self.weights_n_bits = weights_n_bits
62
- self.weights_per_channel_threshold = weights_per_channel_threshold
63
- self.enable_weights_quantization = enable_weights_quantization
64
142
  self.enable_activation_quantization = enable_activation_quantization
65
143
  self.quantization_preserving = quantization_preserving
66
144
  self.fixed_scale = fixed_scale
67
145
  self.fixed_zero_point = fixed_zero_point
68
- self.eights_lut_values_bitwidth = weights_multiplier_nbits
69
146
  self.simd_size = simd_size
70
147
 
71
148
  def get_info(self):
@@ -76,22 +153,28 @@ class OpQuantizationConfig:
76
153
  """
77
154
  return self.__dict__
78
155
 
79
- def clone_and_edit(self, **kwargs):
156
+ def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs):
80
157
  """
81
158
  Clone the quantization config and edit some of its attributes.
82
159
  Args:
160
+ attr_to_edit: A mapping between attributes names to edit and their parameters that
161
+ should be edited to a new value.
83
162
  **kwargs: Keyword arguments to edit the configuration to clone.
84
163
 
85
164
  Returns:
86
165
  Edited quantization configuration.
87
166
  """
88
167
 
89
- qc = copy.deepcopy(self)
90
- for k, v in kwargs.items():
91
- assert hasattr(qc,
92
- k), f'Edit attributes is possible only for existing attributes in configuration, ' \
93
- f'but {k} is not an attribute of {qc}'
94
- setattr(qc, k, v)
168
+ qc = clone_and_edit_object_params(self, **kwargs)
169
+
170
+ # optionally: editing specific parameters in the config of specified attributes
171
+ edited_attrs = copy.deepcopy(qc.attr_weights_configs_mapping)
172
+ for attr_name, attr_cfg in qc.attr_weights_configs_mapping.items():
173
+ if attr_name in attr_to_edit:
174
+ edited_attrs[attr_name] = attr_cfg.clone_and_edit(**attr_to_edit[attr_name])
175
+
176
+ qc.attr_weights_configs_mapping = edited_attrs
177
+
95
178
  return qc
96
179
 
97
180
  def __eq__(self, other):
@@ -105,14 +188,12 @@ class OpQuantizationConfig:
105
188
  """
106
189
  if not isinstance(other, OpQuantizationConfig):
107
190
  return False
108
- return self.activation_quantization_method == other.activation_quantization_method and \
109
- self.weights_quantization_method == other.weights_quantization_method and \
110
- self.activation_n_bits == other.activation_n_bits and \
111
- self.weights_n_bits == other.weights_n_bits and \
112
- self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
113
- self.enable_weights_quantization == other.enable_weights_quantization and \
114
- self.enable_activation_quantization == other.enable_activation_quantization and \
115
- self.simd_size==other.simd_size
191
+ return self.default_weight_attr_config == other.default_weight_attr_config and \
192
+ self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
193
+ self.activation_quantization_method == other.activation_quantization_method and \
194
+ self.activation_n_bits == other.activation_n_bits and \
195
+ self.enable_activation_quantization == other.enable_activation_quantization and \
196
+ self.simd_size == other.simd_size
116
197
 
117
198
 
118
199
  class QuantizationConfigOptions(object):
@@ -177,6 +258,67 @@ class QuantizationConfigOptions(object):
177
258
  self.__edit_quantization_configuration(qc, kwargs)
178
259
  return qc_options
179
260
 
261
+ def clone_and_edit_weight_attribute(self, attrs: List[str] = None, **kwargs):
262
+ """
263
+ Clones the quantization configurations and edits some of their attributes' parameters.
264
+
265
+ Args:
266
+ attrs: attributes names to clone their configurations. If None is provided, updating the configurations
267
+ of all attributes in the operation attributes config mapping.
268
+ **kwargs: Keyword arguments to edit in the attributes configuration.
269
+
270
+ Returns:
271
+ QuantizationConfigOptions with edited attributes configurations.
272
+
273
+ """
274
+
275
+ qc_options = copy.deepcopy(self)
276
+
277
+ for qc in qc_options.quantization_config_list:
278
+ if attrs is None:
279
+ attrs_to_update = list(qc.attr_weights_configs_mapping.keys())
280
+ else:
281
+ if not isinstance(attrs, List):
282
+ Logger.error(f"Expecting a list of attribute but got {type(attrs)}.")
283
+ attrs_to_update = attrs
284
+
285
+ for attr in attrs_to_update:
286
+ if qc.attr_weights_configs_mapping.get(attr) is None:
287
+ Logger.error(f'Edit attributes is possible only for existing attributes '
288
+ f'in the configuration weights config mapping, but {attr} is not an attribute of {qc}.')
289
+ self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs)
290
+ return qc_options
291
+
292
+ def clone_and_map_weights_attr_keys(self, layer_attrs_mapping: Union[Dict[str, str], None]):
293
+ """
294
+ Clones the quantization configuration options and edits the keys in each configuration attributes config mapping,
295
+ based on the given attributes names mapping.
296
+
297
+ Args:
298
+ layer_attrs_mapping: A mapping between attributes names.
299
+
300
+ Returns:
301
+ QuantizationConfigOptions with edited attributes names.
302
+
303
+ """
304
+ qc_options = copy.deepcopy(self)
305
+
306
+ for qc in qc_options.quantization_config_list:
307
+ if layer_attrs_mapping is None:
308
+ qc.attr_weights_configs_mapping = {}
309
+ else:
310
+ new_attr_mapping = {}
311
+ for attr in list(qc.attr_weights_configs_mapping.keys()):
312
+ new_key = layer_attrs_mapping.get(attr)
313
+ if new_key is None:
314
+ Logger.error(f"Attribute {attr} does not exist in the given attribute mapping.")
315
+
316
+ new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr)
317
+
318
+ qc.attr_weights_configs_mapping.update(new_attr_mapping)
319
+
320
+ return qc_options
321
+
180
322
  def __edit_quantization_configuration(self, qc, kwargs):
181
323
  for k, v in kwargs.items():
182
324
  assert hasattr(qc,
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
 
16
- from typing import List, Any
16
+ from typing import List, Any, Dict
17
17
 
18
18
  from model_compression_toolkit.logger import Logger
19
19
  from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import _current_tpc
@@ -22,21 +22,25 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.oper
22
22
  OperatorsSetBase
23
23
 
24
24
 
25
-
26
25
  class OperationsSetToLayers(TargetPlatformCapabilitiesComponent):
27
26
  """
28
27
  Associate an OperatorsSet to a list of framework's layers.
29
28
  """
30
29
  def __init__(self,
31
30
  op_set_name: str,
32
- layers: List[Any]):
31
+ layers: List[Any],
32
+ attr_mapping: Dict[str, Any] = None):
33
33
  """
34
34
 
35
35
  Args:
36
36
  op_set_name (str): Name of OperatorsSet to associate with layers.
37
37
  layers (List[Any]): List of layers/FilterLayerParams to associate with OperatorsSet.
38
+ attr_mapping (dict): A mapping between a general attribute name to a DefaultDict that maps a layer
39
+ + type to the layer's framework name of this attribute (the dictionary type is not specified to
40
+ + handle circular dependency).
38
41
  """
39
42
  self.layers = layers
43
+ self.attr_mapping = attr_mapping
40
44
  super(OperationsSetToLayers, self).__init__(name=op_set_name)
41
45
  _current_tpc.get().remove_opset_from_not_used_list(op_set_name)
42
46
 
@@ -188,6 +188,16 @@ class TargetPlatformCapabilities(ImmutableClass):
188
188
  qco = self.tp_model.get_config_options_by_operators_set(op2layers.name)
189
189
  if qco is None:
190
190
  qco = self.tp_model.default_qco
191
+
192
+ # here, we need to take care of mapping a general attribute name into a framework and
193
+ # layer type specific attribute name.
194
+ # attr_mapping is a mapping between an attribute generic name to a dictionary that maps each
195
+ # layer type to its framework-specific attribute name.
196
+ # in the loop below, v is the inner dictionary.
197
+ layer_attrs_mapping = None if op2layers.attr_mapping is None else \
198
+ {k: v.get(l) for k, v in op2layers.attr_mapping.items()}
199
+ qco = qco.clone_and_map_weights_attr_keys(layer_attrs_mapping)
200
+
191
201
  if isinstance(l, LayerFilterParams):
192
202
  filterlayer2qco.update({l: qco})
193
203
  else:
@@ -231,4 +241,4 @@ class TargetPlatformCapabilities(ImmutableClass):
231
241
  Returns: Check if the TP model defines that padding due to SIMD constrains occurs.
232
242
 
233
243
  """
234
- return self.tp_model.is_simd_padding
244
+ return self.tp_model.is_simd_padding
@@ -15,8 +15,12 @@
15
15
  from typing import List, Tuple
16
16
 
17
17
  import model_compression_toolkit as mct
18
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
18
20
  from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
19
21
  TargetPlatformModel
22
+ from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
23
+ AttributeQuantizationConfig
20
24
 
21
25
  tp = mct.target_platform
22
26
 
@@ -32,14 +36,14 @@ def get_tp_model() -> TargetPlatformModel:
32
36
  Returns: A TargetPlatformModel object.
33
37
 
34
38
  """
35
- base_config, mixed_precision_cfg_list = get_op_quantization_configs()
36
- return generate_tp_model(default_config=base_config,
39
+ base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
40
+ return generate_tp_model(default_config=default_config,
37
41
  base_config=base_config,
38
42
  mixed_precision_cfg_list=mixed_precision_cfg_list,
39
43
  name='imx500_tp_model')
40
44
 
41
45
 
42
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
46
+ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
43
47
  """
44
48
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
45
49
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -48,21 +52,63 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
48
52
  Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
49
53
 
50
54
  """
55
+
56
+ # TODO: currently, we don't want to quantize any attribute but the kernel by default,
57
+ # to preserve the current behavior of MCT, so quantization is disabled for all other attributes.
58
+ # Other quantization parameters are set to what we eventually want to quantize by default
59
+ # when we enable multi-attributes quantization - THIS NEED TO BE MODIFIED IN ALL TP MODELS!
60
+
61
+ # define a default quantization config for all non-specified weights attributes.
62
+ default_weight_attr_config = AttributeQuantizationConfig(
63
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
64
+ weights_n_bits=8,
65
+ weights_per_channel_threshold=False,
66
+ enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization
67
+ lut_values_bitwidth=None)
68
+
69
+ # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
70
+ kernel_base_config = AttributeQuantizationConfig(
71
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
72
+ weights_n_bits=8,
73
+ weights_per_channel_threshold=True,
74
+ enable_weights_quantization=True,
75
+ lut_values_bitwidth=None)
76
+
77
+ # define a quantization config to quantize the bias (for layers where there is a bias attribute).
78
+ bias_config = AttributeQuantizationConfig(
79
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
80
+ weights_n_bits=FLOAT_BITWIDTH,
81
+ weights_per_channel_threshold=False,
82
+ enable_weights_quantization=False,
83
+ lut_values_bitwidth=None)
84
+
51
85
  # Create a quantization config.
52
86
  # A quantization configuration defines how an operator
53
87
  # should be quantized on the modeled hardware:
54
- eight_bits = tp.OpQuantizationConfig(
88
+
89
+ # We define a default config for operation without kernel attribute.
90
+ # This is the default config that should be used for non-linear operations.
91
+ eight_bits_default = tp.OpQuantizationConfig(
92
+ default_weight_attr_config=default_weight_attr_config,
93
+ attr_weights_configs_mapping={},
94
+ activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
95
+ activation_n_bits=8,
96
+ enable_activation_quantization=True,
97
+ quantization_preserving=False,
98
+ fixed_scale=None,
99
+ fixed_zero_point=None,
100
+ simd_size=32)
101
+
102
+ # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
103
+ linear_eight_bits = tp.OpQuantizationConfig(
104
+ default_weight_attr_config=default_weight_attr_config,
105
+ attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
55
106
  activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
56
- weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
57
107
  activation_n_bits=8,
58
- weights_n_bits=8,
59
- weights_per_channel_threshold=True,
60
- enable_weights_quantization=True,
61
108
  enable_activation_quantization=True,
62
109
  quantization_preserving=False,
63
110
  fixed_scale=None,
64
111
  fixed_zero_point=None,
65
- weights_multiplier_nbits=None,
66
112
  simd_size=32)
67
113
 
68
114
  # To quantize a model using mixed-precision, create
@@ -70,14 +116,14 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
70
116
  # In this example, we quantize some operations' weights
71
117
  # using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
72
118
  # to quantize the operations' activations using LUT.
73
- four_bits = eight_bits.clone_and_edit(weights_n_bits=4,
74
- simd_size=eight_bits.simd_size*2)
75
- two_bits = eight_bits.clone_and_edit(weights_n_bits=2,
76
- simd_size=eight_bits.simd_size*4)
119
+ four_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}},
120
+ simd_size=linear_eight_bits.simd_size * 2)
121
+ two_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}},
122
+ simd_size=linear_eight_bits.simd_size * 4)
77
123
 
78
- mixed_precision_cfg_list = [eight_bits, four_bits, two_bits]
124
+ mixed_precision_cfg_list = [linear_eight_bits, four_bits, two_bits]
79
125
 
80
- return eight_bits, mixed_precision_cfg_list
126
+ return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
81
127
 
82
128
 
83
129
  def generate_tp_model(default_config: OpQuantizationConfig,
@@ -121,10 +167,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
121
167
  generated_tpc.set_simd_padding(is_simd_padding=True)
122
168
 
123
169
  # May suit for operations like: Dropout, Reshape, etc.
170
+ default_qco = tp.get_default_quantization_config_options()
124
171
  tp.OperatorsSet("NoQuantization",
125
- tp.get_default_quantization_config_options().clone_and_edit(
126
- enable_weights_quantization=False,
127
- enable_activation_quantization=False))
172
+ default_qco.clone_and_edit(enable_activation_quantization=False)
173
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False))
128
174
 
129
175
  # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
130
176
  mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
@@ -14,7 +14,11 @@
14
14
  # ==============================================================================
15
15
  import tensorflow as tf
16
16
  from packaging import version
17
+
18
+ from model_compression_toolkit.defaultdict import DefaultDict
17
19
  from model_compression_toolkit.constants import FOUND_SONY_CUSTOM_LAYERS
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \
21
+ KERAS_KERNEL, BIAS_ATTR, BIAS
18
22
 
19
23
  if FOUND_SONY_CUSTOM_LAYERS:
20
24
  from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
@@ -85,14 +89,26 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
85
89
 
86
90
  with keras_tpc:
87
91
  tp.OperationsSetToLayers("NoQuantization", no_quant_list)
88
-
89
- tp.OperationsSetToLayers("Conv", [Conv2D,
90
- DepthwiseConv2D,
91
- Conv2DTranspose,
92
- tf.nn.conv2d,
93
- tf.nn.depthwise_conv2d,
94
- tf.nn.conv2d_transpose])
95
- tp.OperationsSetToLayers("FullyConnected", [Dense])
92
+ tp.OperationsSetToLayers("Conv",
93
+ [Conv2D,
94
+ DepthwiseConv2D,
95
+ Conv2DTranspose,
96
+ tf.nn.conv2d,
97
+ tf.nn.depthwise_conv2d,
98
+ tf.nn.conv2d_transpose],
99
+ # we provide attributes mapping that maps each layer type in the operations set
100
+ # that has weights attributes with provided quantization config (in the tp model) to
101
+ # its framework-specific attribute name.
102
+ # note that a DefaultDict should be provided if not all the layer types in the
103
+ # operation set are provided separately in the mapping.
104
+ attr_mapping={
105
+ KERNEL_ATTR: DefaultDict({
106
+ DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
107
+ tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
108
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
109
+ tp.OperationsSetToLayers("FullyConnected", [Dense],
110
+ attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
111
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
96
112
  tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
97
113
  tf.nn.relu6,
98
114
  tf.nn.leaky_relu,