mct-nightly 1.10.0.20231017.post414__py3-none-any.whl → 1.10.0.20231019.post424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/METADATA +1 -1
  2. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/RECORD +22 -15
  3. model_compression_toolkit/core/common/framework_implementation.py +0 -12
  4. model_compression_toolkit/core/common/hessian/hessian_info_service.py +17 -1
  5. model_compression_toolkit/core/keras/constants.py +7 -0
  6. model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py +127 -0
  7. model_compression_toolkit/core/keras/keras_implementation.py +3 -17
  8. model_compression_toolkit/core/pytorch/pytorch_implementation.py +0 -15
  9. model_compression_toolkit/qat/common/qat_config.py +4 -1
  10. model_compression_toolkit/qat/keras/quantizer/__init__.py +2 -0
  11. model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py +14 -0
  12. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +254 -0
  13. model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +250 -0
  14. model_compression_toolkit/qat/keras/quantizer/quant_utils.py +17 -0
  15. model_compression_toolkit/qat/pytorch/quantizer/__init__.py +3 -1
  16. model_compression_toolkit/qat/pytorch/quantizer/lsq/__init__.py +14 -0
  17. model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +228 -0
  18. model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +223 -0
  19. model_compression_toolkit/qat/pytorch/quantizer/quantizer_utils.py +17 -4
  20. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/LICENSE.md +0 -0
  21. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/WHEEL +0 -0
  22. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,250 @@
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import numpy as np
16
+ import tensorflow as tf
17
+ from tensorflow.python.framework.tensor_shape import TensorShape
18
+ from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX
19
+ from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
20
+ from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
21
+ from model_compression_toolkit.qat import TrainingMethod
22
+
23
+ from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
24
+ from mct_quantizers.keras.quantizers import \
25
+ BaseKerasInferableQuantizer, WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer
26
+
27
+ from model_compression_toolkit import constants as C
28
+
29
+ from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer
30
+ from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \
31
+ TrainableQuantizerActivationConfig
32
+ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
33
+ from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
34
+ from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale, adjust_range_to_include_zero
35
+
36
+
37
+ def uniform_lsq_quantizer(x: tf.Tensor,
38
+ min_range: tf.Tensor,
39
+ max_range: tf.Tensor,
40
+ num_bits: int,
41
+ min_int: int,
42
+ max_int:int,
43
+ scale_factor: float) -> tf.Tensor:
44
+ """
45
+ Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
46
+ Args:
47
+ x: input to quantize
48
+ min_range: min range of quantization values
49
+ max_range: min range of quantization values
50
+ num_bits: number of bits for quantization
51
+ min_int: min clipping integer value
52
+ max_int: max clipping integer value
53
+ scale_factor: grad scale of LSQ algorithm
54
+ Returns:
55
+ A quantized tensor
56
+ """
57
+ min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits)
58
+ delta = (max_range - min_range) / (2 ** num_bits - 1)
59
+ delta_scaled = grad_scale(delta, scale_factor)
60
+ rounded = ste_round((x-min_range) / delta_scaled)
61
+ clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
62
+ quantized = delta_scaled * clipped + min_range
63
+ return quantized
64
+
65
+
66
+ @mark_quantizer(quantization_target=QuantizationTarget.Weights,
67
+ quantization_method=[QuantizationMethod.UNIFORM],
68
+ identifier=TrainingMethod.LSQ)
69
+ class LSQUniformWeightQATQuantizer(BaseKerasQATTrainableQuantizer):
70
+ """
71
+ Trainable constrained quantizer to quantize layer's weights.
72
+ """
73
+
74
+ def __init__(self, quantization_config: TrainableQuantizerWeightsConfig):
75
+ """
76
+ Initialize a LSQUniformWeightQATQuantizer object with parameters to use
77
+ for the quantization.
78
+
79
+ Args:
80
+ quantization_config: a trainable quantizer config class with attributes for the quantization.
81
+
82
+ """
83
+ super().__init__(quantization_config)
84
+ self.num_bits = self.quantization_config.weights_n_bits
85
+ self.per_channel = self.quantization_config.weights_per_channel_threshold
86
+ self.channel_axis = self.quantization_config.weights_channels_axis
87
+ max_values = np.array(quantization_config.weights_quantization_params[RANGE_MAX])
88
+ min_values = np.array(quantization_config.weights_quantization_params[RANGE_MIN])
89
+ self.min_max_shape = np.asarray(max_values).shape
90
+ self.max_values = np.reshape(max_values, [-1]) if self.per_channel else float(max_values)
91
+ self.min_values = np.reshape(min_values, [-1]) if self.per_channel else float(min_values)
92
+ self.min_int = 0
93
+ self.max_int = 2**self.num_bits - 1
94
+ self.scale_factor = 1.0 / np.sqrt(self.max_int * self.max_values.size)
95
+
96
+
97
+ def initialize_quantization(self,
98
+ tensor_shape: TensorShape,
99
+ name: str,
100
+ layer: KerasTrainableQuantizationWrapper):
101
+ """
102
+ Add quantizer parameters to the quantizer parameters dictionary
103
+
104
+ Args:
105
+ tensor_shape: tensor shape of the quantized tensor.
106
+ name: Tensor name.
107
+ layer: Layer to quantize.
108
+ """
109
+ fq_min = layer.add_weight(
110
+ name + FQ_MIN,
111
+ shape=len(self.min_values) if self.per_channel else (),
112
+ initializer=tf.keras.initializers.Constant(-1.0),
113
+ trainable=True)
114
+ fq_min.assign(self.min_values)
115
+
116
+ fq_max = layer.add_weight(
117
+ name + FQ_MAX,
118
+ shape=len(self.max_values) if self.per_channel else (),
119
+ initializer=tf.keras.initializers.Constant(1.0),
120
+ trainable=True)
121
+ fq_max.assign(self.max_values)
122
+
123
+ # save the quantizer added parameters for later calculations
124
+ self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
125
+ self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
126
+
127
+ def __call__(self, inputs: tf.Tensor,
128
+ training: bool):
129
+ """
130
+ Quantize a tensor.
131
+ Args:
132
+ inputs: Input tensor to quantize.
133
+ training: Whether the graph is in training mode.
134
+
135
+ Returns:
136
+ The quantized tensor.
137
+ """
138
+
139
+ min_range = self.get_quantizer_variable(FQ_MIN)
140
+ max_range = self.get_quantizer_variable(FQ_MAX)
141
+ q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, self.scale_factor)
142
+ return q_tensor
143
+
144
+ def convert2inferable(self) -> BaseKerasInferableQuantizer:
145
+ """
146
+ Convert quantizer to inferable quantizer.
147
+
148
+ Returns:
149
+ BaseKerasInferableQuantizer object.
150
+ """
151
+ min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
152
+ self.get_quantizer_variable(FQ_MAX).numpy(),
153
+ self.num_bits)
154
+ return WeightsUniformInferableQuantizer(num_bits=self.num_bits,
155
+ min_range=list(min_range.flatten()),
156
+ max_range=list(max_range.flatten()),
157
+ per_channel=self.per_channel,
158
+ channel_axis=self.channel_axis,
159
+ input_rank=len(self.min_max_shape))
160
+
161
+
162
+ @mark_quantizer(quantization_target=QuantizationTarget.Activation,
163
+ quantization_method=[QuantizationMethod.UNIFORM],
164
+ identifier=TrainingMethod.LSQ)
165
+ class LSQUniformActivationQATQuantizer(BaseKerasQATTrainableQuantizer):
166
+ """
167
+ Trainable constrained quantizer to quantize layer activations.
168
+ """
169
+
170
+ def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
171
+ """
172
+ Initialize a LSQUniformActivationQATQuantizer object with parameters to use
173
+ for the quantization.
174
+
175
+ Args:
176
+ quantization_config: trainable quantizer config class
177
+ """
178
+ super().__init__(quantization_config)
179
+
180
+ self.num_bits = quantization_config.activation_n_bits
181
+ self.min_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MIN])
182
+ self.max_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MAX])
183
+ self.min_int = 0
184
+ self.max_int = 2**self.num_bits - 1
185
+
186
+ def initialize_quantization(self,
187
+ tensor_shape: TensorShape,
188
+ name: str,
189
+ layer: KerasTrainableQuantizationWrapper):
190
+ """
191
+ Add quantizer parameters to the quantizer parameters dictionary
192
+
193
+ Args:
194
+ tensor_shape: tensor shape of the quantized tensor.
195
+ name: Tensor name.
196
+ layer: Layer to quantize.
197
+ """
198
+ fq_min = layer.add_weight(
199
+ name + FQ_MIN,
200
+ shape=(),
201
+ initializer=tf.keras.initializers.Constant(-1.0),
202
+ trainable=True)
203
+ fq_min.assign(self.min_range)
204
+
205
+ fq_max = layer.add_weight(
206
+ name + FQ_MAX,
207
+ shape=(),
208
+ initializer=tf.keras.initializers.Constant(1.0),
209
+ trainable=True)
210
+ fq_max.assign(self.max_range)
211
+
212
+ # save the quantizer added parameters for later calculations
213
+ self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
214
+ self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
215
+
216
+ def __call__(self,
217
+ inputs: tf.Tensor,
218
+ training: bool):
219
+ """
220
+ Quantize a tensor.
221
+ Args:
222
+ inputs: Input tensor to quantize.
223
+ training: Whether the graph is in training mode.
224
+
225
+ Returns:
226
+ The quantized tensor.
227
+ """
228
+
229
+ min_range = self.get_quantizer_variable(FQ_MIN)
230
+ max_range = self.get_quantizer_variable(FQ_MAX)
231
+ n_channels = inputs.shape[-1]
232
+ scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
233
+ q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor)
234
+ return q_tensor
235
+
236
+ def convert2inferable(self) -> BaseKerasInferableQuantizer:
237
+ """
238
+ Convert quantizer to inferable quantizer.
239
+
240
+ Returns:
241
+ BaseKerasInferableQuantizer object.
242
+ """
243
+ min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
244
+ self.get_quantizer_variable(FQ_MAX).numpy(),
245
+ self.num_bits)
246
+ return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
247
+ # In activation quantization is per-tensor only - thus we pass
248
+ # the min/max as lists with a len of 1
249
+ min_range=[min_range],
250
+ max_range=[max_range])
@@ -17,6 +17,23 @@ import tensorflow as tf
17
17
  from typing import Tuple
18
18
 
19
19
 
20
+ def ste_round(x: tf.Tensor) -> tf.Tensor:
21
+ """
22
+ Return the rounded values of a tensor.
23
+ """
24
+ error = tf.stop_gradient(tf.math.round(x) - x)
25
+ return error + x
26
+
27
+
28
+ def grad_scale(x: tf.Tensor, scale=1.0) -> tf.Tensor:
29
+ """
30
+ Return x in forward and x*scale in backward (for scaling the gradients).
31
+ """
32
+ x_scaled = scale * x
33
+ error = tf.stop_gradient(x - x_scaled)
34
+ return error + x_scaled
35
+
36
+
20
37
  def adjust_range_to_include_zero(range_min: tf.Tensor,
21
38
  range_max: tf.Tensor,
22
39
  n_bits: int) -> Tuple[tf.Tensor, tf.Tensor]:
@@ -14,4 +14,6 @@
14
14
  # ==============================================================================
15
15
 
16
16
  import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.symmetric_ste
17
- import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.uniform_ste
17
+ import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.uniform_ste
18
+ import model_compression_toolkit.qat.pytorch.quantizer.lsq.symmetric_lsq
19
+ import model_compression_toolkit.qat.pytorch.quantizer.lsq.uniform_lsq
@@ -0,0 +1,14 @@
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
@@ -0,0 +1,228 @@
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from typing import Union
16
+
17
+ import numpy as np
18
+ import torch
19
+ import torch.nn as nn
20
+
21
+ from model_compression_toolkit.qat import TrainingMethod
22
+ from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
23
+ from mct_quantizers import PytorchQuantizationWrapper
24
+ from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
25
+ from model_compression_toolkit import constants as C
26
+ from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_quantizer import BasePytorchQATTrainableQuantizer
27
+ from mct_quantizers.common.base_inferable_quantizer import mark_quantizer, QuantizationTarget
28
+
29
+ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
30
+ from model_compression_toolkit.qat.pytorch.quantizer.quantizer_utils import ste_round, grad_scale
31
+ from mct_quantizers.pytorch.quantizers import \
32
+ WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, ActivationPOTInferableQuantizer, \
33
+ ActivationSymmetricInferableQuantizer
34
+ from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
35
+ TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
36
+ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
37
+
38
+
39
+ def symmetric_lsq_quantizer(x: nn.Parameter,
40
+ thresholds: nn.Parameter,
41
+ num_bits: int,
42
+ sign: bool,
43
+ min_int: int,
44
+ max_int: int,
45
+ scale_factor: float) -> Union[nn.Parameter, torch.Tensor]:
46
+ """
47
+ Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
48
+ Args:
49
+ x: input to quantize
50
+ thresholds: thresholds of quantization levels
51
+ num_bits: number of bits for quantization
52
+ sign: whether x is signed or not
53
+ min_int: min clipping integer value
54
+ max_int: max clipping integer value
55
+ scale_factor: grad scale of LSQ algorithm
56
+ Returns:
57
+ A quantized tensor
58
+ """
59
+ delta = thresholds / (2 ** (num_bits - int(sign)))
60
+ delta_scaled = grad_scale(delta, scale_factor)
61
+ rounded = ste_round(x / delta_scaled)
62
+ clipped = torch.clip(rounded, min=min_int, max=max_int)
63
+ quantized = delta_scaled * clipped
64
+ return quantized
65
+
66
+
67
+ @mark_quantizer(quantization_target=QuantizationTarget.Weights,
68
+ quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
69
+ identifier=TrainingMethod.LSQ)
70
+ class LSQWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
71
+ """
72
+ Trainable constrained quantizer to quantize layer's weights.
73
+ """
74
+
75
+ def __init__(self, quantization_config: TrainableQuantizerWeightsConfig):
76
+ """
77
+ Initialize a LSQWeightQATQuantizer object with parameters to use
78
+ for the quantization.
79
+
80
+ Args:
81
+ quantization_config: trainable quantizer config class
82
+ """
83
+ super().__init__(quantization_config)
84
+ self.power_of_two = quantization_config.weights_quantization_method == QuantizationMethod.POWER_OF_TWO
85
+ self.threshold_values = np.array(quantization_config.weights_quantization_params[C.THRESHOLD])
86
+ if self.power_of_two:
87
+ self.threshold_values = np.power(2.0, np.ceil(np.log2(np.maximum(self.threshold_values, C.MIN_THRESHOLD))))
88
+ self.num_bits = self.quantization_config.weights_n_bits
89
+ n_pos_bits = self.num_bits - int(C.WEIGHTS_SIGNED)
90
+ self.min_int = -int(C.WEIGHTS_SIGNED) * (2 ** n_pos_bits)
91
+ self.max_int = 2 ** n_pos_bits - 1
92
+ self.scale_factor = 1.0 / np.sqrt(self.max_int * self.threshold_values.size)
93
+
94
+ def initialize_quantization(self,
95
+ tensor_shape: torch.Size,
96
+ name: str,
97
+ layer: PytorchQuantizationWrapper):
98
+ """
99
+ Add quantizer parameters to the quantizer parameters dictionary
100
+
101
+ Args:
102
+ tensor_shape: tensor shape of the quantized tensor.
103
+ name: Tensor name.
104
+ layer: Layer to quantize.
105
+ """
106
+
107
+ # Add threshold variables to layer.
108
+ layer.register_parameter(name + "_" + THRESHOLD_TENSOR, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
109
+
110
+ # save the quantizer added parameters for later calculations
111
+ self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name + "_" + THRESHOLD_TENSOR), VariableGroup.QPARAMS)
112
+
113
+
114
+ def __call__(self,
115
+ inputs: nn.Parameter,
116
+ training: bool) -> nn.Parameter:
117
+ """
118
+ Quantize a tensor
119
+ Args:
120
+ inputs: Input tensor to quantize.
121
+ training: whether in training mode or not
122
+ Returns:
123
+ quantized tensor
124
+ """
125
+ thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
126
+ weight_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, C.WEIGHTS_SIGNED, self.min_int, self.max_int, self.scale_factor)
127
+ return weight_quantized
128
+
129
+ def convert2inferable(self) -> Union[WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer]:
130
+ """
131
+ Convert quantizer to inferable quantizer.
132
+
133
+ Returns:
134
+ A pytorch inferable quanizer object.
135
+ """
136
+ threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy().flatten()
137
+ if self.power_of_two:
138
+ pot_threshold = 2 ** np.ceil(np.log2(threshold_values))
139
+ return WeightsPOTInferableQuantizer(num_bits=self.num_bits,
140
+ threshold=pot_threshold.tolist(),
141
+ per_channel=self.quantization_config.weights_per_channel_threshold,
142
+ channel_axis=self.quantization_config.weights_channels_axis)
143
+ else:
144
+ return WeightsSymmetricInferableQuantizer(num_bits=self.num_bits,
145
+ threshold=threshold_values.tolist(),
146
+ per_channel=self.quantization_config.weights_per_channel_threshold,
147
+ channel_axis=self.quantization_config.weights_channels_axis)
148
+
149
+
150
+
151
+ @mark_quantizer(quantization_target=QuantizationTarget.Activation,
152
+ quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
153
+ identifier=TrainingMethod.LSQ)
154
+ class LSQActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
155
+ """
156
+ Trainable constrained quantizer to quantize layer activations.
157
+ """
158
+
159
+ def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
160
+ """
161
+ Initialize a LSQActivationQATQuantizer object with parameters to use
162
+ for symmetric or power of two quantization.
163
+
164
+ Args:
165
+ quantization_config: trainable quantizer config class
166
+ """
167
+ super().__init__(quantization_config)
168
+ self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
169
+ self.sign = quantization_config.activation_quantization_params['is_signed']
170
+ self.threshold_values = np.array([quantization_config.activation_quantization_params[C.THRESHOLD]])
171
+ self.num_bits = quantization_config.activation_n_bits
172
+ n_pos_bits = self.num_bits - int(self.sign)
173
+ self.min_int = -int(self.sign) * (2 ** n_pos_bits)
174
+ self.max_int = (2 ** n_pos_bits) - 1
175
+
176
+ def initialize_quantization(self,
177
+ tensor_shape: torch.Size,
178
+ name: str,
179
+ layer: PytorchQuantizationWrapper):
180
+ """
181
+ Add quantizer parameters to the quantizer parameters dictionary
182
+
183
+ Args:
184
+ tensor_shape: tensor shape of the quantized tensor.
185
+ name: Tensor name.
186
+ layer: Layer to quantize.
187
+ """
188
+ layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
189
+
190
+ # save the quantizer added parameters for later calculations
191
+ self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
192
+
193
+ def __call__(self,
194
+ inputs: torch.Tensor,
195
+ training: bool = True) -> torch.Tensor:
196
+ """
197
+ Quantize a tensor.
198
+ Args:
199
+ inputs: Input tensor to quantize.
200
+ training: Whether the graph is in training mode.
201
+
202
+ Returns:
203
+ The quantized tensor.
204
+ """
205
+
206
+ thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
207
+ n_channels = inputs.shape[1]
208
+ scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
209
+ inputs_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor)
210
+ return inputs_quantized
211
+
212
+ def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
213
+ """
214
+ Convert quantizer to inferable quantizer.
215
+
216
+ Returns:
217
+ A pytorch inferable quanizer object.
218
+ """
219
+ threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
220
+ if self.power_of_two:
221
+ pot_threshold = np.power(2.0, np.ceil(np.log2(threshold_values)))
222
+ return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
223
+ threshold=pot_threshold.tolist(),
224
+ signed=self.sign)
225
+ else:
226
+ return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
227
+ threshold=threshold_values.tolist(),
228
+ signed=self.sign)