mct-nightly 1.11.0.20240130.post401__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/METADATA +1 -1
  2. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/RECORD +44 -44
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +1 -0
  5. model_compression_toolkit/core/__init__.py +0 -1
  6. model_compression_toolkit/core/common/framework_info.py +1 -2
  7. model_compression_toolkit/core/common/graph/base_graph.py +1 -1
  8. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +4 -2
  9. model_compression_toolkit/core/common/quantization/node_quantization_config.py +11 -6
  10. model_compression_toolkit/core/common/quantization/quantization_config.py +5 -13
  11. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +1 -1
  12. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +40 -17
  13. model_compression_toolkit/core/common/substitutions/scale_equalization.py +1 -1
  14. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  15. model_compression_toolkit/core/pytorch/default_framework_info.py +2 -2
  16. model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py +14 -7
  17. model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +4 -1
  18. model_compression_toolkit/gptq/common/gptq_config.py +1 -3
  19. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  20. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  21. model_compression_toolkit/target_platform_capabilities/constants.py +18 -1
  22. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +1 -1
  23. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +175 -33
  24. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +7 -3
  25. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +11 -1
  26. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +64 -18
  27. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +24 -8
  28. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +15 -2
  29. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +64 -20
  30. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +24 -7
  31. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +15 -2
  32. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +60 -18
  33. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +24 -7
  34. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +15 -2
  35. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +52 -11
  36. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +25 -8
  37. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +15 -2
  38. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +52 -13
  39. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +12 -1
  40. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +12 -1
  41. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/LICENSE.md +0 -0
  42. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/WHEEL +0 -0
  43. {mct_nightly-1.11.0.20240130.post401.dist-info → mct_nightly-1.11.0.20240201.post434.dist-info}/top_level.txt +0 -0
  44. /model_compression_toolkit/{core/common/defaultdict.py → defaultdict.py} +0 -0
@@ -16,6 +16,9 @@ import torch
16
16
  from torch.nn import Conv2d, Linear, BatchNorm2d, ConvTranspose2d, Hardtanh, ReLU, ReLU6
17
17
  from torch.nn.functional import relu, relu6, hardtanh
18
18
 
19
+ from model_compression_toolkit.defaultdict import DefaultDict
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
21
+ BIAS
19
22
  from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.v1.tp_model import get_tp_model
20
23
  import model_compression_toolkit as mct
21
24
  from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.v1 import __version__ as TPC_VERSION
@@ -45,13 +48,23 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
45
48
  name=name,
46
49
  version=TPC_VERSION)
47
50
 
51
+ # we provide attributes mapping that maps each layer type in the operations set
52
+ # that has weights attributes with provided quantization config (in the tp model) to
53
+ # its framework-specific attribute name.
54
+ # note that a DefaultDict should be provided if not all the layer types in the
55
+ # operation set are provided separately in the mapping.
56
+ pytorch_linear_attr_mapping = {KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
57
+ BIAS_ATTR: DefaultDict(default_value=BIAS)}
58
+
48
59
  with pytorch_tpc:
49
60
  tp.OperationsSetToLayers("Conv", [Conv2d,
50
61
  torch.nn.functional.conv2d,
51
62
  ConvTranspose2d,
52
- torch.nn.functional.conv_transpose2d])
63
+ torch.nn.functional.conv_transpose2d],
64
+ attr_mapping=pytorch_linear_attr_mapping)
53
65
 
54
- tp.OperationsSetToLayers("Linear", [Linear])
66
+ tp.OperationsSetToLayers("Linear", [Linear],
67
+ attr_mapping=pytorch_linear_attr_mapping)
55
68
 
56
69
  tp.OperationsSetToLayers("BatchNorm", [BatchNorm2d])
57
70
 
@@ -15,10 +15,12 @@
15
15
  from typing import List, Tuple
16
16
 
17
17
  import model_compression_toolkit as mct
18
+ from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR
18
20
  from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
19
21
  TargetPlatformModel
20
22
  from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
21
- QuantizationMethod
23
+ QuantizationMethod, AttributeQuantizationConfig
22
24
 
23
25
  tp = mct.target_platform
24
26
 
@@ -34,14 +36,14 @@ def get_tp_model() -> TargetPlatformModel:
34
36
  Returns: A TargetPlatformModel object.
35
37
 
36
38
  """
37
- base_config, mixed_precision_cfg_list = get_op_quantization_configs()
38
- return generate_tp_model(default_config=base_config,
39
+ base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
40
+ return generate_tp_model(default_config=default_config,
39
41
  base_config=base_config,
40
42
  mixed_precision_cfg_list=mixed_precision_cfg_list,
41
43
  name='tflite_tp_model')
42
44
 
43
45
 
44
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
46
+ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
45
47
  """
46
48
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
47
49
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -50,27 +52,64 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
50
52
  Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
51
53
 
52
54
  """
55
+
56
+ # We define a default quantization config for all non-specified weights attributes.
57
+ default_weight_attr_config = AttributeQuantizationConfig(
58
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
59
+ weights_n_bits=8,
60
+ weights_per_channel_threshold=False,
61
+ enable_weights_quantization=False,
62
+ lut_values_bitwidth=None)
63
+
64
+ # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
65
+ kernel_base_config = AttributeQuantizationConfig(
66
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
67
+ weights_n_bits=8,
68
+ weights_per_channel_threshold=True,
69
+ enable_weights_quantization=True,
70
+ lut_values_bitwidth=None)
71
+
72
+ # We define a quantization config to quantize the bias (for layers where there is a bias attribute).
73
+ bias_config = AttributeQuantizationConfig(
74
+ weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
75
+ weights_n_bits=FLOAT_BITWIDTH,
76
+ weights_per_channel_threshold=False,
77
+ enable_weights_quantization=False,
78
+ lut_values_bitwidth=None)
79
+
53
80
  # Create a quantization config.
54
81
  # A quantization configuration defines how an operator
55
82
  # should be quantized on the modeled hardware:
56
- eight_bits = tp.OpQuantizationConfig(
83
+
84
+ # We define a default config for operation without kernel attribute.
85
+ # This is the default config that should be used for non-linear operations.
86
+ eight_bits_default = tp.OpQuantizationConfig(
87
+ default_weight_attr_config=default_weight_attr_config,
88
+ attr_weights_configs_mapping={},
89
+ activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
90
+ activation_n_bits=8,
91
+ enable_activation_quantization=True,
92
+ quantization_preserving=False,
93
+ fixed_scale=None,
94
+ fixed_zero_point=None,
95
+ simd_size=32)
96
+
97
+ # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
98
+ linear_eight_bits = tp.OpQuantizationConfig(
57
99
  activation_quantization_method=QuantizationMethod.UNIFORM,
58
- weights_quantization_method=QuantizationMethod.SYMMETRIC,
100
+ default_weight_attr_config=default_weight_attr_config,
101
+ attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
59
102
  activation_n_bits=8,
60
- weights_n_bits=8,
61
- weights_per_channel_threshold=True,
62
- enable_weights_quantization=True,
63
103
  enable_activation_quantization=True,
64
104
  quantization_preserving=False,
65
105
  fixed_scale=None,
66
106
  fixed_zero_point=None,
67
- weights_multiplier_nbits=None,
68
107
  simd_size=None
69
108
  )
70
109
 
71
110
  mixed_precision_cfg_list = [] # No mixed precision
72
111
 
73
- return eight_bits, mixed_precision_cfg_list
112
+ return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
74
113
 
75
114
 
76
115
  def generate_tp_model(default_config: OpQuantizationConfig,
@@ -112,9 +151,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
112
151
  tp.get_default_quantization_config_options().clone_and_edit(
113
152
  quantization_preserving=True))
114
153
 
154
+ fc_qco = tp.get_default_quantization_config_options()
115
155
  fc = tp.OperatorsSet("FullyConnected",
116
- tp.get_default_quantization_config_options().clone_and_edit(
117
- weights_per_channel_threshold=False))
156
+ fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False))
118
157
 
119
158
  tp.OperatorsSet("L2Normalization",
120
159
  tp.get_default_quantization_config_options().clone_and_edit(
@@ -15,6 +15,9 @@
15
15
  import tensorflow as tf
16
16
  from packaging import version
17
17
 
18
+ from model_compression_toolkit.defaultdict import DefaultDict
19
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, BIAS
20
+
18
21
  if version.parse(tf.__version__) >= version.parse("2.13"):
19
22
  from keras.src.layers import Conv2D, Dense, Reshape, ZeroPadding2D, AveragePooling2D, Activation, DepthwiseConv2D, \
20
23
  MaxPooling2D, ReLU, Add, Softmax, Concatenate, Multiply, Maximum, Minimum, BatchNormalization
@@ -85,7 +88,15 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
85
88
  tf.slice,
86
89
  SlicingOpLambda])
87
90
 
88
- tp.OperationsSetToLayers("FullyConnected", [Dense])
91
+ tp.OperationsSetToLayers("FullyConnected", [Dense],
92
+ # we provide attributes mapping that maps each layer type in the operations set
93
+ # that has weights attributes with provided quantization config (in the tp model) to
94
+ # its framework-specific attribute name.
95
+ # note that a DefaultDict should be provided if not all the layer types in the
96
+ # operation set are provided separately in the mapping.
97
+ attr_mapping={
98
+ KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
99
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
89
100
  tp.OperationsSetToLayers("L2Normalization", [tf.math.l2_normalize])
90
101
  tp.OperationsSetToLayers("LogSoftmax", [tf.nn.log_softmax])
91
102
  tp.OperationsSetToLayers("Tanh", [tf.nn.tanh, tp.LayerFilterParams(Activation, activation="tanh")])
@@ -15,6 +15,10 @@
15
15
  import torch
16
16
  from torch.nn import AvgPool2d, MaxPool2d
17
17
  from torch.nn.functional import avg_pool2d, max_pool2d, interpolate
18
+
19
+ from model_compression_toolkit.defaultdict import DefaultDict
20
+ from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
21
+ BIAS
18
22
  from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import Eq
19
23
 
20
24
  from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tp_model import get_tp_model
@@ -68,7 +72,14 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
68
72
  torch.select,
69
73
  torch.unbind])
70
74
 
71
- tp.OperationsSetToLayers("FullyConnected", [torch.nn.Linear, torch.nn.functional.linear])
75
+ tp.OperationsSetToLayers("FullyConnected", [torch.nn.Linear, torch.nn.functional.linear],
76
+ # we provide attributes mapping that maps each layer type in the operations set
77
+ # that has weights attributes with provided quantization config (in the tp model) to
78
+ # its framework-specific attribute name.
79
+ # note that a DefaultDict should be provided if not all the layer types in the
80
+ # operation set are provided separately in the mapping.
81
+ attr_mapping={KERNEL_ATTR: DefaultDict(default_value=PYTORCH_KERNEL),
82
+ BIAS_ATTR: DefaultDict(default_value=BIAS)})
72
83
  tp.OperationsSetToLayers("L2Normalization",
73
84
  [tp.LayerFilterParams(torch.nn.functional.normalize, Eq('p', 2) | Eq('p', None))])
74
85
  tp.OperationsSetToLayers("LogSoftmax", [torch.nn.LogSoftmax])