mct-nightly 1.11.0.20240130.post401__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/METADATA +1 -1
  2. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/RECORD +44 -44
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +1 -0
  5. model_compression_toolkit/core/__init__.py +0 -1
  6. model_compression_toolkit/core/common/framework_info.py +1 -2
  7. model_compression_toolkit/core/common/graph/base_graph.py +1 -1
  8. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +4 -2
  9. model_compression_toolkit/core/common/quantization/node_quantization_config.py +11 -6
  10. model_compression_toolkit/core/common/quantization/quantization_config.py +5 -13
  11. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +1 -1
  12. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +40 -17
  13. model_compression_toolkit/core/common/substitutions/scale_equalization.py +1 -1
  14. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  15. model_compression_toolkit/core/pytorch/default_framework_info.py +2 -2
  16. model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py +14 -7
  17. model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +4 -1
  18. model_compression_toolkit/gptq/common/gptq_config.py +1 -3
  19. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  20. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  21. model_compression_toolkit/target_platform_capabilities/constants.py +18 -1
  22. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +1 -1
  23. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +175 -33
  24. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +7 -3
  25. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +11 -1
  26. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +64 -18
  27. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +24 -8
  28. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +15 -2
  29. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +64 -20
  30. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +24 -7
  31. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +15 -2
  32. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +60 -18
  33. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +24 -7
  34. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +15 -2
  35. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +52 -11
  36. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +25 -8
  37. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +15 -2
  38. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +52 -13
  39. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +12 -1
  40. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +12 -1
  41. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/LICENSE.md +0 -0
  42. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/WHEEL +0 -0
  43. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/top_level.txt +0 -0
  44. /model_compression_toolkit/{core/common/defaultdict.py → defaultdict.py} +0 -0
@@ -23,6 +23,9 @@ from torch.nn import Dropout, Flatten, Hardtanh
23
23
  from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
24
24
  from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
25
25
 
26
+ from model_compression_toolkit.defaultdict import DefaultDict
27
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \
28
+ BIAS
26
29
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model
27
30
  import model_compression_toolkit as mct
28
31
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1 import __version__ as TPC_VERSION
@@ -52,6 +55,14 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
52
55
  name=name,
53
56
  version=TPC_VERSION)
54
57
 
58
+ # we provide attributes mapping that maps each layer type in the operations set
59
+ # that has weights attributes with provided quantization config (in the tp model) to
60
+ # its framework-specific attribute name.
61
+ # note that a DefaultDict should be provided if not all the layer types in the
62
+ # operation set are provided separately in the mapping.
63
+ pytorch_linear_attr_mapping = {KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
64
+ BIAS_ATTR: DefaultDict(default_value=BIAS)}
65
+
55
66
  with pytorch_tpc:
56
67
  tp.OperationsSetToLayers("NoQuantization", [Dropout,
57
68
  Flatten,
@@ -72,8 +83,10 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
72
83
  gather,
73
84
  topk])
74
85
 
75
- tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d])
76
- tp.OperationsSetToLayers("FullyConnected", [Linear])
86
+ tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d],
87
+ attr_mapping=pytorch_linear_attr_mapping)
88
+ tp.OperationsSetToLayers("FullyConnected", [Linear],
89
+ attr_mapping=pytorch_linear_attr_mapping)
77
90
  tp.OperationsSetToLayers("AnyReLU", [torch.relu,
78
91
  ReLU,
79
92
  ReLU6,
@@ -15,8 +15,13 @@
15
15
  from typing import List, Tuple
16
16
 
17
17
  import model_compression_toolkit as mct
18
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
20
+ WEIGHTS_QUANTIZATION_METHOD
18
21
  from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
19
22
  TargetPlatformModel
23
+ from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
24
+ AttributeQuantizationConfig
20
25
 
21
26
  tp = mct.target_platform
22
27
 
@@ -32,14 +37,14 @@ def get_tp_model() -> TargetPlatformModel:
32
37
  Returns: A TargetPlatformModel object.
33
38
 
34
39
  """
35
- base_config, mixed_precision_cfg_list = get_op_quantization_configs()
36
- return generate_tp_model(default_config=base_config,
40
+ base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
41
+ return generate_tp_model(default_config=default_config,
37
42
  base_config=base_config,
38
43
  mixed_precision_cfg_list=mixed_precision_cfg_list,
39
44
  name='imx500_lut_tp_model')
40
45
 
41
46
 
42
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
47
+ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
43
48
  """
44
49
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
45
50
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -48,21 +53,58 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
48
53
  Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
49
54
 
50
55
  """
56
+
57
+ # We define a default quantization config for all non-specified weights attributes.
58
+ default_weight_attr_config = AttributeQuantizationConfig(
59
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
60
+ weights_n_bits=8,
61
+ weights_per_channel_threshold=False,
62
+ enable_weights_quantization=False,
63
+ lut_values_bitwidth=None)
64
+
65
+ # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
66
+ kernel_base_config = AttributeQuantizationConfig(
67
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
68
+ weights_n_bits=8,
69
+ weights_per_channel_threshold=True,
70
+ enable_weights_quantization=True,
71
+ lut_values_bitwidth=None)
72
+
73
+ # We define a quantization config to quantize the bias (for layers where there is a bias attribute).
74
+ bias_config = AttributeQuantizationConfig(
75
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
76
+ weights_n_bits=FLOAT_BITWIDTH,
77
+ weights_per_channel_threshold=False,
78
+ enable_weights_quantization=False,
79
+ lut_values_bitwidth=None)
80
+
51
81
  # Create a quantization config.
52
82
  # A quantization configuration defines how an operator
53
83
  # should be quantized on the modeled hardware:
54
- eight_bits = tp.OpQuantizationConfig(
84
+
85
+ # We define a default config for operation without kernel attribute.
86
+ # This is the default config that should be used for non-linear operations.
87
+ eight_bits_default = tp.OpQuantizationConfig(
88
+ default_weight_attr_config=default_weight_attr_config,
89
+ attr_weights_configs_mapping={},
90
+ activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
91
+ activation_n_bits=8,
92
+ enable_activation_quantization=True,
93
+ quantization_preserving=False,
94
+ fixed_scale=None,
95
+ fixed_zero_point=None,
96
+ simd_size=32)
97
+
98
+ # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
99
+ linear_eight_bits = tp.OpQuantizationConfig(
100
+ default_weight_attr_config=default_weight_attr_config,
101
+ attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
55
102
  activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
56
- weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
57
103
  activation_n_bits=8,
58
- weights_n_bits=8,
59
- weights_per_channel_threshold=True,
60
- enable_weights_quantization=True,
61
104
  enable_activation_quantization=True,
62
105
  quantization_preserving=False,
63
106
  fixed_scale=None,
64
107
  fixed_zero_point=None,
65
- weights_multiplier_nbits=None,
66
108
  simd_size=32)
67
109
 
68
110
  # To quantize a model using mixed-precision, create
@@ -70,15 +112,17 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
70
112
  # In this example, we quantize some operations' weights
71
113
  # using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
72
114
  # to quantize the operations' activations using LUT.
73
- four_bits_lut = eight_bits.clone_and_edit(weights_n_bits=4,
74
- weights_quantization_method=tp.QuantizationMethod.LUT_SYM_QUANTIZER,
75
- simd_size=eight_bits.simd_size*2)
76
- two_bits_lut = eight_bits.clone_and_edit(weights_n_bits=2,
77
- weights_quantization_method=tp.QuantizationMethod.LUT_SYM_QUANTIZER,
78
- simd_size=eight_bits.simd_size*4)
79
- mixed_precision_cfg_list = [eight_bits, four_bits_lut, two_bits_lut]
115
+ four_bits_lut = linear_eight_bits.clone_and_edit(
116
+ attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4,
117
+ WEIGHTS_QUANTIZATION_METHOD: tp.QuantizationMethod.LUT_SYM_QUANTIZER}},
118
+ simd_size=linear_eight_bits.simd_size * 2)
119
+ two_bits_lut = linear_eight_bits.clone_and_edit(
120
+ attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2,
121
+ WEIGHTS_QUANTIZATION_METHOD: tp.QuantizationMethod.LUT_SYM_QUANTIZER}},
122
+ simd_size=linear_eight_bits.simd_size * 4)
123
+ mixed_precision_cfg_list = [linear_eight_bits, four_bits_lut, two_bits_lut]
80
124
 
81
- return eight_bits, mixed_precision_cfg_list
125
+ return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
82
126
 
83
127
 
84
128
  def generate_tp_model(default_config: OpQuantizationConfig,
@@ -120,10 +164,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
120
164
  # Otherwise, it will be a configure-less set (used in fusing):
121
165
 
122
166
  # May suit for operations like: Dropout, Reshape, etc.
167
+ default_qco = tp.get_default_quantization_config_options()
123
168
  tp.OperatorsSet("NoQuantization",
124
- tp.get_default_quantization_config_options().clone_and_edit(
125
- enable_weights_quantization=False,
126
- enable_activation_quantization=False))
169
+ default_qco.clone_and_edit(enable_activation_quantization=False)
170
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False))
127
171
 
128
172
  # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
129
173
  mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
@@ -14,7 +14,11 @@
14
14
  # ==============================================================================
15
15
  import tensorflow as tf
16
16
  from packaging import version
17
+
18
+ from model_compression_toolkit.defaultdict import DefaultDict
17
19
  from model_compression_toolkit.constants import FOUND_SONY_CUSTOM_LAYERS
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \
21
+ KERAS_DEPTHWISE_KERNEL, BIAS
18
22
 
19
23
  if FOUND_SONY_CUSTOM_LAYERS:
20
24
  from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
@@ -86,13 +90,26 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
86
90
  with keras_tpc:
87
91
  tp.OperationsSetToLayers("NoQuantization", no_quant_list)
88
92
 
89
- tp.OperationsSetToLayers("Conv", [Conv2D,
90
- DepthwiseConv2D,
91
- Conv2DTranspose,
92
- tf.nn.conv2d,
93
- tf.nn.depthwise_conv2d,
94
- tf.nn.conv2d_transpose])
95
- tp.OperationsSetToLayers("FullyConnected", [Dense])
93
+ tp.OperationsSetToLayers("Conv",
94
+ [Conv2D,
95
+ DepthwiseConv2D,
96
+ Conv2DTranspose,
97
+ tf.nn.conv2d,
98
+ tf.nn.depthwise_conv2d,
99
+ tf.nn.conv2d_transpose],
100
+ # we provide attributes mapping that maps each layer type in the operations set
101
+ # that has weights attributes with provided quantization config (in the tp model) to
102
+ # its framework-specific attribute name.
103
+ # note that a DefaultDict should be provided if not all the layer types in the
104
+ # operation set are provided separately in the mapping.
105
+ attr_mapping={
106
+ KERNEL_ATTR: DefaultDict({
107
+ DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
108
+ tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
109
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
110
+ tp.OperationsSetToLayers("FullyConnected", [Dense],
111
+ attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
112
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
96
113
  tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
97
114
  tf.nn.relu6,
98
115
  tf.nn.leaky_relu,
@@ -23,6 +23,9 @@ from torch.nn import Dropout, Flatten, Hardtanh
23
23
  from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
24
24
  from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
25
25
 
26
+ from model_compression_toolkit.defaultdict import DefaultDict
27
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
28
+ BIAS
26
29
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1_lut.tp_model import get_tp_model
27
30
  import model_compression_toolkit as mct
28
31
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1_lut import __version__ as TPC_VERSION
@@ -52,6 +55,14 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
52
55
  name=name,
53
56
  version=TPC_VERSION)
54
57
 
58
+ # we provide attributes mapping that maps each layer type in the operations set
59
+ # that has weights attributes with provided quantization config (in the tp model) to
60
+ # its framework-specific attribute name.
61
+ # note that a DefaultDict should be provided if not all the layer types in the
62
+ # operation set are provided separately in the mapping.
63
+ pytorch_linear_attr_mapping = {KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
64
+ BIAS_ATTR: DefaultDict(default_value=BIAS)}
65
+
55
66
  with pytorch_tpc:
56
67
  tp.OperationsSetToLayers("NoQuantization", [Dropout,
57
68
  Flatten,
@@ -72,8 +83,10 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
72
83
  gather,
73
84
  topk])
74
85
 
75
- tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d])
76
- tp.OperationsSetToLayers("FullyConnected", [Linear])
86
+ tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d],
87
+ attr_mapping=pytorch_linear_attr_mapping)
88
+ tp.OperationsSetToLayers("FullyConnected", [Linear],
89
+ attr_mapping=pytorch_linear_attr_mapping)
77
90
  tp.OperationsSetToLayers("AnyReLU", [torch.relu,
78
91
  ReLU,
79
92
  ReLU6,
@@ -15,8 +15,13 @@
15
15
  from typing import List, Tuple
16
16
 
17
17
  import model_compression_toolkit as mct
18
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
18
20
  from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
19
21
  TargetPlatformModel
22
+ from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
23
+ AttributeQuantizationConfig
24
+
20
25
 
21
26
  tp = mct.target_platform
22
27
 
@@ -32,14 +37,14 @@ def get_tp_model() -> TargetPlatformModel:
32
37
  Returns: A TargetPlatformModel object.
33
38
 
34
39
  """
35
- base_config, mixed_precision_cfg_list = get_op_quantization_configs()
36
- return generate_tp_model(default_config=base_config,
40
+ base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
41
+ return generate_tp_model(default_config=default_config,
37
42
  base_config=base_config,
38
43
  mixed_precision_cfg_list=mixed_precision_cfg_list,
39
44
  name='imx500_pot_tp_model')
40
45
 
41
46
 
42
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
47
+ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
43
48
  """
44
49
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
45
50
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -48,21 +53,58 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
48
53
  Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
49
54
 
50
55
  """
56
+
57
+ # We define a default quantization config for all non-specified weights attributes.
58
+ default_weight_attr_config = AttributeQuantizationConfig(
59
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
60
+ weights_n_bits=8,
61
+ weights_per_channel_threshold=False,
62
+ enable_weights_quantization=False,
63
+ lut_values_bitwidth=None)
64
+
65
+ # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
66
+ kernel_base_config = AttributeQuantizationConfig(
67
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
68
+ weights_n_bits=8,
69
+ weights_per_channel_threshold=True,
70
+ enable_weights_quantization=True,
71
+ lut_values_bitwidth=None)
72
+
73
+ # We define a quantization config to quantize the bias (for layers where there is a bias attribute).
74
+ bias_config = AttributeQuantizationConfig(
75
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
76
+ weights_n_bits=FLOAT_BITWIDTH,
77
+ weights_per_channel_threshold=False,
78
+ enable_weights_quantization=False,
79
+ lut_values_bitwidth=None)
80
+
51
81
  # Create a quantization config.
52
82
  # A quantization configuration defines how an operator
53
83
  # should be quantized on the modeled hardware:
54
- eight_bits = tp.OpQuantizationConfig(
84
+
85
+ # We define a default config for operation without kernel attribute.
86
+ # This is the default config that should be used for non-linear operations.
87
+ eight_bits_default = tp.OpQuantizationConfig(
88
+ default_weight_attr_config=default_weight_attr_config,
89
+ attr_weights_configs_mapping={},
55
90
  activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
56
- weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
57
91
  activation_n_bits=8,
58
- weights_n_bits=8,
59
- weights_per_channel_threshold=True,
60
- enable_weights_quantization=True,
61
92
  enable_activation_quantization=True,
62
93
  quantization_preserving=False,
63
94
  fixed_scale=None,
64
95
  fixed_zero_point=None,
65
- weights_multiplier_nbits=None,
96
+ simd_size=32)
97
+
98
+ # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
99
+ linear_eight_bits = tp.OpQuantizationConfig(
100
+ activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
101
+ default_weight_attr_config=default_weight_attr_config,
102
+ attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
103
+ activation_n_bits=8,
104
+ enable_activation_quantization=True,
105
+ quantization_preserving=False,
106
+ fixed_scale=None,
107
+ fixed_zero_point=None,
66
108
  simd_size=32)
67
109
 
68
110
  # To quantize a model using mixed-precision, create
@@ -70,13 +112,13 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
70
112
  # In this example, we quantize some operations' weights
71
113
  # using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
72
114
  # to quantize the operations' activations using LUT.
73
- four_bits = eight_bits.clone_and_edit(weights_n_bits=4,
74
- simd_size=eight_bits.simd_size*2)
75
- two_bits = eight_bits.clone_and_edit(weights_n_bits=2,
76
- simd_size=eight_bits.simd_size*4)
77
- mixed_precision_cfg_list = [eight_bits, four_bits, two_bits]
115
+ four_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}},
116
+ simd_size=linear_eight_bits.simd_size * 2)
117
+ two_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}},
118
+ simd_size=linear_eight_bits.simd_size * 4)
119
+ mixed_precision_cfg_list = [linear_eight_bits, four_bits, two_bits]
78
120
 
79
- return eight_bits, mixed_precision_cfg_list
121
+ return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
80
122
 
81
123
 
82
124
  def generate_tp_model(default_config: OpQuantizationConfig,
@@ -118,10 +160,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
118
160
  # Otherwise, it will be a configure-less set (used in fusing):
119
161
 
120
162
  # May suit for operations like: Dropout, Reshape, etc.
163
+ default_qco = tp.get_default_quantization_config_options()
121
164
  tp.OperatorsSet("NoQuantization",
122
- tp.get_default_quantization_config_options().clone_and_edit(
123
- enable_weights_quantization=False,
124
- enable_activation_quantization=False))
165
+ default_qco.clone_and_edit(enable_activation_quantization=False)
166
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False))
125
167
 
126
168
  # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
127
169
  mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
@@ -14,7 +14,11 @@
14
14
  # ==============================================================================
15
15
  import tensorflow as tf
16
16
  from packaging import version
17
+
18
+ from model_compression_toolkit.defaultdict import DefaultDict
17
19
  from model_compression_toolkit.constants import FOUND_SONY_CUSTOM_LAYERS
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \
21
+ KERAS_KERNEL, BIAS_ATTR, BIAS
18
22
 
19
23
  if FOUND_SONY_CUSTOM_LAYERS:
20
24
  from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
@@ -86,13 +90,26 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
86
90
  with keras_tpc:
87
91
  tp.OperationsSetToLayers("NoQuantization", no_quant_list)
88
92
 
89
- tp.OperationsSetToLayers("Conv", [Conv2D,
90
- DepthwiseConv2D,
91
- Conv2DTranspose,
92
- tf.nn.conv2d,
93
- tf.nn.depthwise_conv2d,
94
- tf.nn.conv2d_transpose])
95
- tp.OperationsSetToLayers("FullyConnected", [Dense])
93
+ tp.OperationsSetToLayers("Conv",
94
+ [Conv2D,
95
+ DepthwiseConv2D,
96
+ Conv2DTranspose,
97
+ tf.nn.conv2d,
98
+ tf.nn.depthwise_conv2d,
99
+ tf.nn.conv2d_transpose],
100
+ # we provide attributes mapping that maps each layer type in the operations set
101
+ # that has weights attributes with provided quantization config (in the tp model) to
102
+ # its framework-specific attribute name.
103
+ # note that a DefaultDict should be provided if not all the layer types in the
104
+ # operation set are provided separately in the mapping.
105
+ attr_mapping={
106
+ KERNEL_ATTR: DefaultDict({
107
+ DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
108
+ tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
109
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
110
+ tp.OperationsSetToLayers("FullyConnected", [Dense],
111
+ attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
112
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
96
113
  tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
97
114
  tf.nn.relu6,
98
115
  tf.nn.leaky_relu,
@@ -23,6 +23,9 @@ from torch.nn import Dropout, Flatten, Hardtanh
23
23
  from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
24
24
  from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
25
25
 
26
+ from model_compression_toolkit.defaultdict import DefaultDict
27
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
28
+ BIAS
26
29
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1_pot.tp_model import get_tp_model
27
30
  import model_compression_toolkit as mct
28
31
  from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1_pot import (
@@ -53,6 +56,14 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
53
56
  name=name,
54
57
  version=TPC_VERSION)
55
58
 
59
+ # we provide attributes mapping that maps each layer type in the operations set
60
+ # that has weights attributes with provided quantization config (in the tp model) to
61
+ # its framework-specific attribute name.
62
+ # note that a DefaultDict should be provided if not all the layer types in the
63
+ # operation set are provided separately in the mapping.
64
+ pytorch_linear_attr_mapping = {KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
65
+ BIAS_ATTR: DefaultDict(default_value=BIAS)}
66
+
56
67
  with pytorch_tpc:
57
68
  tp.OperationsSetToLayers("NoQuantization", [Dropout,
58
69
  Flatten,
@@ -73,8 +84,10 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
73
84
  gather,
74
85
  topk])
75
86
 
76
- tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d])
77
- tp.OperationsSetToLayers("FullyConnected", [Linear])
87
+ tp.OperationsSetToLayers("Conv", [Conv2d, ConvTranspose2d],
88
+ attr_mapping=pytorch_linear_attr_mapping)
89
+ tp.OperationsSetToLayers("FullyConnected", [Linear],
90
+ attr_mapping=pytorch_linear_attr_mapping)
78
91
  tp.OperationsSetToLayers("AnyReLU", [torch.relu,
79
92
  ReLU,
80
93
  ReLU6,
@@ -15,8 +15,12 @@
15
15
  from typing import List, Tuple
16
16
 
17
17
  import model_compression_toolkit as mct
18
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR
18
20
  from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
19
21
  TargetPlatformModel
22
+ from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
23
+ AttributeQuantizationConfig
20
24
 
21
25
  tp = mct.target_platform
22
26
 
@@ -32,14 +36,14 @@ def get_tp_model() -> TargetPlatformModel:
32
36
  Returns: A TargetPlatformModel object.
33
37
 
34
38
  """
35
- base_config, mixed_precision_cfg_list = get_op_quantization_configs()
36
- return generate_tp_model(default_config=base_config,
39
+ base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
40
+ return generate_tp_model(default_config=default_config,
37
41
  base_config=base_config,
38
42
  mixed_precision_cfg_list=mixed_precision_cfg_list,
39
43
  name='qnnpack_tp_model')
40
44
 
41
45
 
42
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
46
+ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
43
47
  """
44
48
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
45
49
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -48,29 +52,66 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
48
52
  Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
49
53
 
50
54
  """
55
+
56
+ # We define a default quantization config for all non-specified weights attributes.
57
+ default_weight_attr_config = AttributeQuantizationConfig(
58
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
59
+ weights_n_bits=8,
60
+ weights_per_channel_threshold=False,
61
+ enable_weights_quantization=False,
62
+ lut_values_bitwidth=None)
63
+
64
+ # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
65
+ kernel_base_config = AttributeQuantizationConfig(
66
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
67
+ weights_n_bits=8,
68
+ weights_per_channel_threshold=False,
69
+ enable_weights_quantization=True,
70
+ lut_values_bitwidth=None)
71
+
72
+ # We define a quantization config to quantize the bias (for layers where there is a bias attribute).
73
+ bias_config = AttributeQuantizationConfig(
74
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
75
+ weights_n_bits=FLOAT_BITWIDTH,
76
+ weights_per_channel_threshold=False,
77
+ enable_weights_quantization=False,
78
+ lut_values_bitwidth=None)
79
+
51
80
  # Create a quantization config. A quantization configuration defines how an operator
52
81
  # should be quantized on the modeled hardware.
53
82
  # For qnnpack backend, Pytorch uses a QConfig with torch.per_tensor_affine for
54
83
  # activations quantization and a torch.per_tensor_symmetric quantization scheme
55
84
  # for weights quantization (https://pytorch.org/docs/stable/quantization.html#natively-supported-backends):
56
- eight_bits = tp.OpQuantizationConfig(
85
+
86
+ # We define a default config for operation without kernel attribute.
87
+ # This is the default config that should be used for non-linear operations.
88
+ eight_bits_default = tp.OpQuantizationConfig(
89
+ default_weight_attr_config=default_weight_attr_config,
90
+ attr_weights_configs_mapping={},
91
+ activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
92
+ activation_n_bits=8,
93
+ enable_activation_quantization=True,
94
+ quantization_preserving=False,
95
+ fixed_scale=None,
96
+ fixed_zero_point=None,
97
+ simd_size=32)
98
+
99
+ # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
100
+ linear_eight_bits = tp.OpQuantizationConfig(
57
101
  activation_quantization_method=tp.QuantizationMethod.UNIFORM,
58
- weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
102
+ default_weight_attr_config=default_weight_attr_config,
103
+ attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
59
104
  activation_n_bits=8,
60
- weights_n_bits=8,
61
- weights_per_channel_threshold=False,
62
- enable_weights_quantization=True,
63
105
  enable_activation_quantization=True,
64
106
  quantization_preserving=False,
65
107
  fixed_scale=None,
66
108
  fixed_zero_point=None,
67
- weights_multiplier_nbits=None,
68
109
  simd_size=None
69
110
  )
70
111
 
71
- mixed_precision_cfg_list = [] # No mixed precision
112
+ mixed_precision_cfg_list = [] # No mixed precision
72
113
 
73
- return eight_bits, mixed_precision_cfg_list
114
+ return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
74
115
 
75
116
 
76
117
  def generate_tp_model(default_config: OpQuantizationConfig,
@@ -15,6 +15,10 @@
15
15
  import tensorflow as tf
16
16
 
17
17
  from packaging import version
18
+
19
+ from model_compression_toolkit.defaultdict import DefaultDict
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \
21
+ KERAS_DEPTHWISE_KERNEL, BIAS
18
22
  from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.v1 import __version__ as TPC_VERSION
19
23
 
20
24
  if version.parse(tf.__version__) >= version.parse("2.13"):
@@ -53,14 +57,27 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
53
57
  version=TPC_VERSION)
54
58
 
55
59
  with keras_tpc:
56
- tp.OperationsSetToLayers("Conv", [Conv2D,
57
- DepthwiseConv2D,
58
- Conv2DTranspose,
59
- tf.nn.conv2d,
60
- tf.nn.depthwise_conv2d,
61
- tf.nn.conv2d_transpose])
62
-
63
- tp.OperationsSetToLayers("Linear", [Dense])
60
+ tp.OperationsSetToLayers("Conv",
61
+ [Conv2D,
62
+ DepthwiseConv2D,
63
+ Conv2DTranspose,
64
+ tf.nn.conv2d,
65
+ tf.nn.depthwise_conv2d,
66
+ tf.nn.conv2d_transpose],
67
+ # we provide attributes mapping that maps each layer type in the operations set
68
+ # that has weights attributes with provided quantization config (in the tp model) to
69
+ # its framework-specific attribute name.
70
+ # note that a DefaultDict should be provided if not all the layer types in the
71
+ # operation set are provided separately in the mapping.
72
+ attr_mapping={
73
+ KERNEL_ATTR: DefaultDict({
74
+ DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
75
+ tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
76
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
77
+
78
+ tp.OperationsSetToLayers("Linear", [Dense],
79
+ attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
80
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
64
81
 
65
82
  tp.OperationsSetToLayers("BatchNorm", [BatchNormalization,
66
83
  tf.nn.batch_normalization])