mct-nightly 1.10.0.20231017.post414__py3-none-any.whl → 1.10.0.20231019.post424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/METADATA +1 -1
  2. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/RECORD +22 -15
  3. model_compression_toolkit/core/common/framework_implementation.py +0 -12
  4. model_compression_toolkit/core/common/hessian/hessian_info_service.py +17 -1
  5. model_compression_toolkit/core/keras/constants.py +7 -0
  6. model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py +127 -0
  7. model_compression_toolkit/core/keras/keras_implementation.py +3 -17
  8. model_compression_toolkit/core/pytorch/pytorch_implementation.py +0 -15
  9. model_compression_toolkit/qat/common/qat_config.py +4 -1
  10. model_compression_toolkit/qat/keras/quantizer/__init__.py +2 -0
  11. model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py +14 -0
  12. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +254 -0
  13. model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +250 -0
  14. model_compression_toolkit/qat/keras/quantizer/quant_utils.py +17 -0
  15. model_compression_toolkit/qat/pytorch/quantizer/__init__.py +3 -1
  16. model_compression_toolkit/qat/pytorch/quantizer/lsq/__init__.py +14 -0
  17. model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +228 -0
  18. model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +223 -0
  19. model_compression_toolkit/qat/pytorch/quantizer/quantizer_utils.py +17 -4
  20. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/LICENSE.md +0 -0
  21. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/WHEEL +0 -0
  22. {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,223 @@
1
+ # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from typing import Union
16
+ import numpy as np
17
+ import torch
18
+ import torch.nn as nn
19
+
20
+ from model_compression_toolkit.constants import RANGE_MAX, RANGE_MIN
21
+ from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
22
+
23
+ from model_compression_toolkit.qat import TrainingMethod
24
+ from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
25
+ from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
26
+ from model_compression_toolkit import constants as C
27
+
28
+ from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_quantizer import BasePytorchQATTrainableQuantizer
29
+ from mct_quantizers import mark_quantizer
30
+ from model_compression_toolkit.qat.pytorch.quantizer.quantizer_utils import ste_round, grad_scale
31
+ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
32
+ from mct_quantizers.pytorch.quantizers import \
33
+ WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer
34
+ from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
35
+ TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
36
+ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
37
+ from model_compression_toolkit.qat.pytorch.quantizer.quantizer_utils import adjust_range_to_include_zero
38
+ from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
39
+
40
+
41
+
42
+ def uniform_lsq_quantizer(x: nn.Parameter,
43
+ min_range: nn.Parameter,
44
+ max_range: nn.Parameter,
45
+ num_bits: int,
46
+ min_int: int,
47
+ max_int: int,
48
+ scale_factor: float) -> Union[nn.Parameter, torch.Tensor]:
49
+ """
50
+ Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
51
+ Args:
52
+ x: input to quantize
53
+ min_range: min range of quantization values
54
+ max_range: min range of quantization values
55
+ num_bits: number of bits for quantization
56
+ min_int: min clipping integer value
57
+ max_int: max clipping integer value
58
+ scale_factor: grad scale of LSQ algorithm
59
+ Returns:
60
+ A quantized tensor
61
+ """
62
+ a, b = adjust_range_to_include_zero(min_range, max_range, num_bits)
63
+ delta = (b - a) / (2 ** num_bits - 1)
64
+ delta_scaled = grad_scale(delta, scale_factor)
65
+ rounded = ste_round((x - a) / delta_scaled)
66
+ clipped = torch.clip(rounded, min=min_int, max=max_int)
67
+ quantized = delta_scaled * clipped + a
68
+ return quantized
69
+
70
+
71
+ @mark_quantizer(quantization_target=QuantizationTarget.Weights,
72
+ quantization_method=[QuantizationMethod.UNIFORM],
73
+ identifier=TrainingMethod.LSQ)
74
+ class LSQUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
75
+ """
76
+ Trainable constrained quantizer to quantize layer's weights.
77
+ """
78
+
79
+ def __init__(self, quantization_config: TrainableQuantizerWeightsConfig):
80
+ """
81
+ Initialize a LSQUniformWeightQATQuantizer object with parameters to use
82
+ for the quantization.
83
+
84
+ Args:
85
+ quantization_config: trainable quantizer config class
86
+ """
87
+ super().__init__(quantization_config)
88
+ self.num_bits = self.quantization_config.weights_n_bits
89
+ self.min_int = 0
90
+ self.max_int = 2 ** self.num_bits - 1
91
+ self.min_values = np.array(quantization_config.weights_quantization_params[RANGE_MIN])
92
+ self.max_values = np.array(quantization_config.weights_quantization_params[RANGE_MAX])
93
+ self.scale_factor = 1.0 / np.sqrt(self.max_int * self.min_values.size)
94
+
95
+ def initialize_quantization(self,
96
+ tensor_shape: torch.Size,
97
+ name: str,
98
+ layer: PytorchQuantizationWrapper):
99
+ """
100
+ Add quantizer parameters to the quantizer parameters dictionary
101
+
102
+ Args:
103
+ tensor_shape: tensor shape of the quantized tensor.
104
+ name: Tensor name.
105
+ layer: Layer to quantize.
106
+ """
107
+
108
+ # Add min and max variables to layer.
109
+ layer.register_parameter(name+"_"+FQ_MIN, nn.Parameter(to_torch_tensor(self.min_values), requires_grad=True))
110
+ layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_values), requires_grad=True))
111
+
112
+ # Save the quantizer parameters for later calculations
113
+ self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
114
+ self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
115
+
116
+
117
+ def __call__(self,
118
+ inputs: nn.Parameter,
119
+ training: bool) -> torch.Tensor:
120
+ """
121
+ Quantize a tensor
122
+ Args:
123
+ inputs: Input tensor to quantize.
124
+ training: whether in training mode or not
125
+ Returns:
126
+ quantized tensor
127
+ """
128
+ min_range = self.get_quantizer_variable(FQ_MIN)
129
+ max_range = self.get_quantizer_variable(FQ_MAX)
130
+ weight_quantized = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, self.scale_factor)
131
+ return weight_quantized
132
+
133
+ def convert2inferable(self) -> WeightsUniformInferableQuantizer:
134
+ """
135
+ Convert quantizer to inferable quantizer.
136
+
137
+ Returns:
138
+ A pytorch inferable quanizer object.
139
+ """
140
+ min_range = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
141
+ max_range = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
142
+ min_range, max_range = fix_range_to_include_zero(min_range, max_range, self.num_bits)
143
+ return WeightsUniformInferableQuantizer(num_bits=self.num_bits,
144
+ min_range=min_range.tolist(),
145
+ max_range=max_range.tolist(),
146
+ per_channel=self.quantization_config.weights_per_channel_threshold,
147
+ channel_axis=self.quantization_config.weights_channels_axis)
148
+
149
+
150
+ @mark_quantizer(quantization_target=QuantizationTarget.Activation,
151
+ quantization_method=[QuantizationMethod.UNIFORM],
152
+ identifier=TrainingMethod.LSQ)
153
+ class LSQUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
154
+ """
155
+ Trainable constrained quantizer to quantize layer activations.
156
+ """
157
+
158
+ def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
159
+ """
160
+ Initialize a LSQUniformActivationQATQuantizer object with parameters to use
161
+ for uniform quantization.
162
+
163
+ Args:
164
+ quantization_config: trainable quantizer config class
165
+ """
166
+ super().__init__(quantization_config)
167
+ self.num_bits = self.quantization_config.activation_n_bits
168
+ self.min_int = 0
169
+ self.max_int = 2 ** self.num_bits - 1
170
+ self.min_range = np.array([quantization_config.activation_quantization_params[C.RANGE_MIN]])
171
+ self.max_range = np.array([quantization_config.activation_quantization_params[C.RANGE_MAX]])
172
+
173
+ def initialize_quantization(self,
174
+ tensor_shape: torch.Size,
175
+ name: str,
176
+ layer: PytorchQuantizationWrapper):
177
+ """
178
+ Add quantizer parameters to the quantizer parameters dictionary
179
+
180
+ Args:
181
+ tensor_shape: tensor shape of the quantized tensor.
182
+ name: Tensor name.
183
+ layer: Layer to quantize.
184
+ """
185
+ layer.register_parameter(name+"_"+FQ_MIN, nn.Parameter(to_torch_tensor(self.min_range), requires_grad=True))
186
+ layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_range), requires_grad=True))
187
+
188
+ # Save the quantizer parameters for later calculations
189
+ self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
190
+ self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
191
+
192
+ def __call__(self,
193
+ inputs: torch.Tensor,
194
+ training: bool = True) -> torch.Tensor:
195
+ """
196
+ Quantize a tensor.
197
+ Args:
198
+ inputs: Input tensor to quantize.
199
+ training: Whether the graph is in training mode.
200
+
201
+ Returns:
202
+ The quantized tensor.
203
+ """
204
+ min_range = self.get_quantizer_variable(FQ_MIN)
205
+ max_range = self.get_quantizer_variable(FQ_MAX)
206
+ n_channels = inputs.shape[1]
207
+ scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
208
+ inputs_quantized = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor)
209
+ return inputs_quantized
210
+
211
+ def convert2inferable(self) -> ActivationUniformInferableQuantizer:
212
+ """
213
+ Convert quantizer to inferable quantizer.
214
+
215
+ Returns:
216
+ A pytorch inferable quanizer object.
217
+ """
218
+ min_range = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
219
+ max_range = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
220
+ min_range, max_range = fix_range_to_include_zero(min_range, max_range, self.num_bits)
221
+ return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
222
+ min_range=min_range.tolist(),
223
+ max_range=max_range.tolist())
@@ -40,9 +40,22 @@ def ste_clip(x: torch.Tensor, min_val=-1.0, max_val=1.0) -> torch.Tensor:
40
40
  return (torch.clip(x, min=min_val, max=max_val) - x).detach() + x
41
41
 
42
42
 
43
- def fix_range_to_include_zero(range_min: torch.Tensor,
44
- range_max: torch.Tensor,
45
- n_bits: int) -> Tuple[torch.Tensor, torch.Tensor]:
43
+ def grad_scale(x: torch.Tensor, scale=1.0) -> torch.Tensor:
44
+ """
45
+ Gradient scale
46
+ Args:
47
+ x: input variable
48
+ scale: scale factor
49
+ Returns:
50
+ x in forward and x*scale in backward (for scaling the gradients).
51
+ """
52
+ x_scaled = x * scale
53
+ return (x - x_scaled).detach() + x_scaled
54
+
55
+
56
+ def adjust_range_to_include_zero(range_min: torch.Tensor,
57
+ range_max: torch.Tensor,
58
+ n_bits: int) -> Tuple[torch.Tensor, torch.Tensor]:
46
59
  """
47
60
  Adjusting the quantization range to include representation of 0.0 in the quantization grid.
48
61
  If quantization per-channel, then range_min and range_max should be tensors in the specific shape that allows
@@ -120,7 +133,7 @@ def uniform_quantizer(tensor_data: torch.Tensor,
120
133
  Quantized data.
121
134
  """
122
135
  # adjusts the quantization range so the quantization grid includes zero.
123
- a, b = fix_range_to_include_zero(range_min, range_max, n_bits)
136
+ a, b = adjust_range_to_include_zero(range_min, range_max, n_bits)
124
137
 
125
138
  # Compute the step size of quantized values.
126
139
  delta_tensor = (b - a) / (2 ** n_bits - 1)