mct-nightly 2.2.0.20241012.448__py3-none-any.whl → 2.2.0.20241018.449__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/RECORD +22 -14
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/qat/keras/quantizer/{base_keras_qat_quantizer.py → base_keras_qat_weight_quantizer.py} +3 -13
  5. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +5 -126
  6. model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +4 -121
  7. model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +7 -6
  8. model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +4 -119
  9. model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +5 -95
  10. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py +20 -0
  11. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py +22 -0
  12. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py +14 -0
  13. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +127 -0
  14. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py +129 -0
  15. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py +14 -0
  16. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py +148 -0
  17. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py +122 -0
  18. model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +12 -10
  19. model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py +60 -0
  20. {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/LICENSE.md +0 -0
  21. {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/WHEEL +0 -0
  22. {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,122 @@
1
+ # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import tensorflow as tf
16
+ from tensorflow.python.framework.tensor_shape import TensorShape
17
+ from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
18
+ from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
19
+ from model_compression_toolkit.trainable_infrastructure import TrainingMethod
20
+
21
+ from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
22
+ from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer, ActivationUniformInferableQuantizer
23
+
24
+ from model_compression_toolkit.qat.keras.quantizer.quant_utils import adjust_range_to_include_zero
25
+ from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
26
+ from model_compression_toolkit import constants as C
27
+
28
+ from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerActivationConfig
29
+ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
30
+ from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import BaseKerasActivationTrainableQuantizer
31
+
32
+
33
+ @mark_quantizer(quantization_target=QuantizationTarget.Activation,
34
+ quantization_method=[QuantizationMethod.UNIFORM],
35
+ identifier=TrainingMethod.STE)
36
+ class STEUniformActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer):
37
+ """
38
+ Trainable constrained quantizer to quantize a layer outputs.
39
+ """
40
+
41
+ def __init__(self, quantization_config: TrainableQuantizerActivationConfig, freeze_quant_params: bool = False):
42
+ """
43
+ Initialize a STEUniformActivationTrainableQuantizer object with parameters to use
44
+ for the quantization.
45
+
46
+ Args:
47
+ quantization_config: trainable quantizer config class
48
+ freeze_quant_params: whether to freeze learnable quantization parameters. This is unused here, since there is not any quantizaiton params that are learned.
49
+
50
+ """
51
+ super().__init__(quantization_config, freeze_quant_params)
52
+
53
+ self.num_bits = quantization_config.activation_n_bits
54
+ self.min_range = quantization_config.activation_quantization_params[C.RANGE_MIN]
55
+ self.max_range = quantization_config.activation_quantization_params[C.RANGE_MAX]
56
+
57
+ def initialize_quantization(self,
58
+ tensor_shape: TensorShape,
59
+ name: str,
60
+ layer: KerasTrainableQuantizationWrapper):
61
+ """
62
+ Add quantizer parameters to the quantizer parameters dictionary
63
+
64
+ Args:
65
+ tensor_shape: tensor shape of the quantized tensor.
66
+ name: Tensor name.
67
+ layer: Layer to quantize.
68
+ """
69
+ fq_min = layer.add_weight(
70
+ name + FQ_MIN,
71
+ shape=(),
72
+ initializer=tf.keras.initializers.Constant(-1.0),
73
+ trainable=False)
74
+ fq_min.assign(self.min_range)
75
+
76
+ fq_max = layer.add_weight(
77
+ name + FQ_MAX,
78
+ shape=(),
79
+ initializer=tf.keras.initializers.Constant(1.0),
80
+ trainable=False)
81
+ fq_max.assign(self.max_range)
82
+
83
+ # save the quantizer added parameters for later calculations
84
+ self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
85
+ self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
86
+
87
+ def __call__(self,
88
+ inputs: tf.Tensor,
89
+ training: bool):
90
+ """
91
+ Quantize a tensor.
92
+ Args:
93
+ inputs: Input tensor to quantize.
94
+ training: Whether the graph is in training mode.
95
+
96
+ Returns:
97
+ The quantized tensor.
98
+ """
99
+
100
+ _min = self.get_quantizer_variable(FQ_MIN)
101
+ _max = self.get_quantizer_variable(FQ_MAX)
102
+ _min, _max = adjust_range_to_include_zero(_min, _max, self.num_bits)
103
+ q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max,
104
+ num_bits=self.num_bits)
105
+
106
+ return q_tensor
107
+
108
+ def convert2inferable(self) -> BaseKerasInferableQuantizer:
109
+ """
110
+ Convert quantizer to inferable quantizer.
111
+
112
+ Returns:
113
+ BaseKerasInferableQuantizer object.
114
+ """
115
+ min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
116
+ self.get_quantizer_variable(FQ_MAX).numpy(),
117
+ self.num_bits)
118
+ return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
119
+ # In activation quantization is per-tensor only - thus we pass
120
+ # the min/max as lists with a len of 1
121
+ min_range=[min_range],
122
+ max_range=[max_range])
@@ -28,16 +28,10 @@ if FOUND_TF:
28
28
  import tensorflow as tf
29
29
 
30
30
  class BaseKerasTrainableQuantizer(BaseTrainableQuantizer):
31
- def __init__(self,
32
- quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
33
- """
34
- This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement.
35
- This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.
36
-
37
- Args:
38
- quantization_config: quantizer config class contains all the information about a quantizer configuration.
39
- """
40
- super().__init__(quantization_config)
31
+ """
32
+ This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement.
33
+ This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.
34
+ """
41
35
 
42
36
  def get_config(self) -> Dict[str, Any]:
43
37
  """
@@ -77,6 +71,14 @@ if FOUND_TF:
77
71
  quantizer_parameter, parameter_group = parameter_dict[VAR], parameter_dict[GROUP]
78
72
  if quantizer_parameter.trainable and parameter_group == group:
79
73
  quantizer_trainable.append(quantizer_parameter)
74
+
75
+ # sanity check to catch inconsistent initialization
76
+ if self.freeze_quant_params and group == VariableGroup.QPARAMS and quantizer_trainable:
77
+ Logger.critical(
78
+ 'Found trainable quantization params despite self.freeze_quant_params=True. '
79
+ 'Quantization parameters were probably not initialized correctly in the Quantizer.'
80
+ ) # pragma: no cover
81
+
80
82
  return quantizer_trainable
81
83
 
82
84
 
@@ -16,6 +16,9 @@
16
16
  import numpy as np
17
17
  import tensorflow as tf
18
18
 
19
+ from model_compression_toolkit.qat.keras.quantizer.quant_utils import grad_scale, ste_round, \
20
+ adjust_range_to_include_zero
21
+
19
22
 
20
23
  def int_quantization_with_threshold(data: tf.Tensor,
21
24
  n_bits: int,
@@ -46,3 +49,60 @@ def int_quantization_with_threshold(data: tf.Tensor,
46
49
 
47
50
  return tf.clip_by_value((data / (threshold + eps)) * (2 ** (n_bits - int(signed))),
48
51
  clip_value_max=clip_max, clip_value_min=clip_min)
52
+
53
+
54
+ def symmetric_lsq_quantizer(x: tf.Tensor,
55
+ thresholds: tf.Tensor,
56
+ num_bits: int,
57
+ sign: bool,
58
+ min_int: int,
59
+ max_int:int,
60
+ scale_factor: float) -> tf.Tensor:
61
+ """
62
+ Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
63
+ Args:
64
+ x: input to quantize
65
+ thresholds: thresholds of quantization levels
66
+ num_bits: number of bits for quantization
67
+ sign: whether x is signed or not
68
+ min_int: min clipping integer value
69
+ max_int: max clipping integer value
70
+ scale_factor: grad scale of LSQ algorithm
71
+ Returns:
72
+ A quantized tensor
73
+ """
74
+ delta = thresholds / (2 ** (num_bits - int(sign)))
75
+ delta_scaled = grad_scale(delta, scale_factor)
76
+ rounded = ste_round(x / delta_scaled)
77
+ clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
78
+ quantized = delta_scaled * clipped
79
+ return quantized
80
+
81
+
82
+ def uniform_lsq_quantizer(x: tf.Tensor,
83
+ min_range: tf.Tensor,
84
+ max_range: tf.Tensor,
85
+ num_bits: int,
86
+ min_int: int,
87
+ max_int:int,
88
+ scale_factor: float) -> tf.Tensor:
89
+ """
90
+ Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
91
+ Args:
92
+ x: input to quantize
93
+ min_range: min range of quantization values
94
+ max_range: min range of quantization values
95
+ num_bits: number of bits for quantization
96
+ min_int: min clipping integer value
97
+ max_int: max clipping integer value
98
+ scale_factor: grad scale of LSQ algorithm
99
+ Returns:
100
+ A quantized tensor
101
+ """
102
+ min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits)
103
+ delta = (max_range - min_range) / (2 ** num_bits - 1)
104
+ delta_scaled = grad_scale(delta, scale_factor)
105
+ rounded = ste_round((x-min_range) / delta_scaled)
106
+ clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
107
+ quantized = delta_scaled * clipped + min_range
108
+ return quantized