mct-nightly 1.10.0.20231017.post414__py3-none-any.whl → 1.10.0.20231019.post424__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/METADATA +1 -1
- {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/RECORD +22 -15
- model_compression_toolkit/core/common/framework_implementation.py +0 -12
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +17 -1
- model_compression_toolkit/core/keras/constants.py +7 -0
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py +127 -0
- model_compression_toolkit/core/keras/keras_implementation.py +3 -17
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +0 -15
- model_compression_toolkit/qat/common/qat_config.py +4 -1
- model_compression_toolkit/qat/keras/quantizer/__init__.py +2 -0
- model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py +14 -0
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +254 -0
- model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +250 -0
- model_compression_toolkit/qat/keras/quantizer/quant_utils.py +17 -0
- model_compression_toolkit/qat/pytorch/quantizer/__init__.py +3 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/__init__.py +14 -0
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +228 -0
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +223 -0
- model_compression_toolkit/qat/pytorch/quantizer/quantizer_utils.py +17 -4
- {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/WHEEL +0 -0
- {mct_nightly-1.10.0.20231017.post414.dist-info → mct_nightly-1.10.0.20231019.post424.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
import numpy as np
|
|
16
|
+
import tensorflow as tf
|
|
17
|
+
from tensorflow.python.framework.tensor_shape import TensorShape
|
|
18
|
+
from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX
|
|
19
|
+
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
|
20
|
+
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
|
21
|
+
from model_compression_toolkit.qat import TrainingMethod
|
|
22
|
+
|
|
23
|
+
from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
|
|
24
|
+
from mct_quantizers.keras.quantizers import \
|
|
25
|
+
BaseKerasInferableQuantizer, WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer
|
|
26
|
+
|
|
27
|
+
from model_compression_toolkit import constants as C
|
|
28
|
+
|
|
29
|
+
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer
|
|
30
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \
|
|
31
|
+
TrainableQuantizerActivationConfig
|
|
32
|
+
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
33
|
+
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
|
|
34
|
+
from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale, adjust_range_to_include_zero
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def uniform_lsq_quantizer(x: tf.Tensor,
|
|
38
|
+
min_range: tf.Tensor,
|
|
39
|
+
max_range: tf.Tensor,
|
|
40
|
+
num_bits: int,
|
|
41
|
+
min_int: int,
|
|
42
|
+
max_int:int,
|
|
43
|
+
scale_factor: float) -> tf.Tensor:
|
|
44
|
+
"""
|
|
45
|
+
Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
|
46
|
+
Args:
|
|
47
|
+
x: input to quantize
|
|
48
|
+
min_range: min range of quantization values
|
|
49
|
+
max_range: min range of quantization values
|
|
50
|
+
num_bits: number of bits for quantization
|
|
51
|
+
min_int: min clipping integer value
|
|
52
|
+
max_int: max clipping integer value
|
|
53
|
+
scale_factor: grad scale of LSQ algorithm
|
|
54
|
+
Returns:
|
|
55
|
+
A quantized tensor
|
|
56
|
+
"""
|
|
57
|
+
min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits)
|
|
58
|
+
delta = (max_range - min_range) / (2 ** num_bits - 1)
|
|
59
|
+
delta_scaled = grad_scale(delta, scale_factor)
|
|
60
|
+
rounded = ste_round((x-min_range) / delta_scaled)
|
|
61
|
+
clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
|
|
62
|
+
quantized = delta_scaled * clipped + min_range
|
|
63
|
+
return quantized
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
|
67
|
+
quantization_method=[QuantizationMethod.UNIFORM],
|
|
68
|
+
identifier=TrainingMethod.LSQ)
|
|
69
|
+
class LSQUniformWeightQATQuantizer(BaseKerasQATTrainableQuantizer):
|
|
70
|
+
"""
|
|
71
|
+
Trainable constrained quantizer to quantize layer's weights.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def __init__(self, quantization_config: TrainableQuantizerWeightsConfig):
|
|
75
|
+
"""
|
|
76
|
+
Initialize a LSQUniformWeightQATQuantizer object with parameters to use
|
|
77
|
+
for the quantization.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
quantization_config: a trainable quantizer config class with attributes for the quantization.
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
super().__init__(quantization_config)
|
|
84
|
+
self.num_bits = self.quantization_config.weights_n_bits
|
|
85
|
+
self.per_channel = self.quantization_config.weights_per_channel_threshold
|
|
86
|
+
self.channel_axis = self.quantization_config.weights_channels_axis
|
|
87
|
+
max_values = np.array(quantization_config.weights_quantization_params[RANGE_MAX])
|
|
88
|
+
min_values = np.array(quantization_config.weights_quantization_params[RANGE_MIN])
|
|
89
|
+
self.min_max_shape = np.asarray(max_values).shape
|
|
90
|
+
self.max_values = np.reshape(max_values, [-1]) if self.per_channel else float(max_values)
|
|
91
|
+
self.min_values = np.reshape(min_values, [-1]) if self.per_channel else float(min_values)
|
|
92
|
+
self.min_int = 0
|
|
93
|
+
self.max_int = 2**self.num_bits - 1
|
|
94
|
+
self.scale_factor = 1.0 / np.sqrt(self.max_int * self.max_values.size)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def initialize_quantization(self,
|
|
98
|
+
tensor_shape: TensorShape,
|
|
99
|
+
name: str,
|
|
100
|
+
layer: KerasTrainableQuantizationWrapper):
|
|
101
|
+
"""
|
|
102
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
106
|
+
name: Tensor name.
|
|
107
|
+
layer: Layer to quantize.
|
|
108
|
+
"""
|
|
109
|
+
fq_min = layer.add_weight(
|
|
110
|
+
name + FQ_MIN,
|
|
111
|
+
shape=len(self.min_values) if self.per_channel else (),
|
|
112
|
+
initializer=tf.keras.initializers.Constant(-1.0),
|
|
113
|
+
trainable=True)
|
|
114
|
+
fq_min.assign(self.min_values)
|
|
115
|
+
|
|
116
|
+
fq_max = layer.add_weight(
|
|
117
|
+
name + FQ_MAX,
|
|
118
|
+
shape=len(self.max_values) if self.per_channel else (),
|
|
119
|
+
initializer=tf.keras.initializers.Constant(1.0),
|
|
120
|
+
trainable=True)
|
|
121
|
+
fq_max.assign(self.max_values)
|
|
122
|
+
|
|
123
|
+
# save the quantizer added parameters for later calculations
|
|
124
|
+
self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
|
|
125
|
+
self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
|
|
126
|
+
|
|
127
|
+
def __call__(self, inputs: tf.Tensor,
|
|
128
|
+
training: bool):
|
|
129
|
+
"""
|
|
130
|
+
Quantize a tensor.
|
|
131
|
+
Args:
|
|
132
|
+
inputs: Input tensor to quantize.
|
|
133
|
+
training: Whether the graph is in training mode.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
The quantized tensor.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
min_range = self.get_quantizer_variable(FQ_MIN)
|
|
140
|
+
max_range = self.get_quantizer_variable(FQ_MAX)
|
|
141
|
+
q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, self.scale_factor)
|
|
142
|
+
return q_tensor
|
|
143
|
+
|
|
144
|
+
def convert2inferable(self) -> BaseKerasInferableQuantizer:
|
|
145
|
+
"""
|
|
146
|
+
Convert quantizer to inferable quantizer.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
BaseKerasInferableQuantizer object.
|
|
150
|
+
"""
|
|
151
|
+
min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
|
|
152
|
+
self.get_quantizer_variable(FQ_MAX).numpy(),
|
|
153
|
+
self.num_bits)
|
|
154
|
+
return WeightsUniformInferableQuantizer(num_bits=self.num_bits,
|
|
155
|
+
min_range=list(min_range.flatten()),
|
|
156
|
+
max_range=list(max_range.flatten()),
|
|
157
|
+
per_channel=self.per_channel,
|
|
158
|
+
channel_axis=self.channel_axis,
|
|
159
|
+
input_rank=len(self.min_max_shape))
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
|
163
|
+
quantization_method=[QuantizationMethod.UNIFORM],
|
|
164
|
+
identifier=TrainingMethod.LSQ)
|
|
165
|
+
class LSQUniformActivationQATQuantizer(BaseKerasQATTrainableQuantizer):
|
|
166
|
+
"""
|
|
167
|
+
Trainable constrained quantizer to quantize layer activations.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
|
171
|
+
"""
|
|
172
|
+
Initialize a LSQUniformActivationQATQuantizer object with parameters to use
|
|
173
|
+
for the quantization.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
quantization_config: trainable quantizer config class
|
|
177
|
+
"""
|
|
178
|
+
super().__init__(quantization_config)
|
|
179
|
+
|
|
180
|
+
self.num_bits = quantization_config.activation_n_bits
|
|
181
|
+
self.min_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MIN])
|
|
182
|
+
self.max_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MAX])
|
|
183
|
+
self.min_int = 0
|
|
184
|
+
self.max_int = 2**self.num_bits - 1
|
|
185
|
+
|
|
186
|
+
def initialize_quantization(self,
|
|
187
|
+
tensor_shape: TensorShape,
|
|
188
|
+
name: str,
|
|
189
|
+
layer: KerasTrainableQuantizationWrapper):
|
|
190
|
+
"""
|
|
191
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
195
|
+
name: Tensor name.
|
|
196
|
+
layer: Layer to quantize.
|
|
197
|
+
"""
|
|
198
|
+
fq_min = layer.add_weight(
|
|
199
|
+
name + FQ_MIN,
|
|
200
|
+
shape=(),
|
|
201
|
+
initializer=tf.keras.initializers.Constant(-1.0),
|
|
202
|
+
trainable=True)
|
|
203
|
+
fq_min.assign(self.min_range)
|
|
204
|
+
|
|
205
|
+
fq_max = layer.add_weight(
|
|
206
|
+
name + FQ_MAX,
|
|
207
|
+
shape=(),
|
|
208
|
+
initializer=tf.keras.initializers.Constant(1.0),
|
|
209
|
+
trainable=True)
|
|
210
|
+
fq_max.assign(self.max_range)
|
|
211
|
+
|
|
212
|
+
# save the quantizer added parameters for later calculations
|
|
213
|
+
self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
|
|
214
|
+
self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
|
|
215
|
+
|
|
216
|
+
def __call__(self,
|
|
217
|
+
inputs: tf.Tensor,
|
|
218
|
+
training: bool):
|
|
219
|
+
"""
|
|
220
|
+
Quantize a tensor.
|
|
221
|
+
Args:
|
|
222
|
+
inputs: Input tensor to quantize.
|
|
223
|
+
training: Whether the graph is in training mode.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
The quantized tensor.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
min_range = self.get_quantizer_variable(FQ_MIN)
|
|
230
|
+
max_range = self.get_quantizer_variable(FQ_MAX)
|
|
231
|
+
n_channels = inputs.shape[-1]
|
|
232
|
+
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
|
233
|
+
q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor)
|
|
234
|
+
return q_tensor
|
|
235
|
+
|
|
236
|
+
def convert2inferable(self) -> BaseKerasInferableQuantizer:
|
|
237
|
+
"""
|
|
238
|
+
Convert quantizer to inferable quantizer.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
BaseKerasInferableQuantizer object.
|
|
242
|
+
"""
|
|
243
|
+
min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
|
|
244
|
+
self.get_quantizer_variable(FQ_MAX).numpy(),
|
|
245
|
+
self.num_bits)
|
|
246
|
+
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
|
247
|
+
# In activation quantization is per-tensor only - thus we pass
|
|
248
|
+
# the min/max as lists with a len of 1
|
|
249
|
+
min_range=[min_range],
|
|
250
|
+
max_range=[max_range])
|
|
@@ -17,6 +17,23 @@ import tensorflow as tf
|
|
|
17
17
|
from typing import Tuple
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def ste_round(x: tf.Tensor) -> tf.Tensor:
|
|
21
|
+
"""
|
|
22
|
+
Return the rounded values of a tensor.
|
|
23
|
+
"""
|
|
24
|
+
error = tf.stop_gradient(tf.math.round(x) - x)
|
|
25
|
+
return error + x
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def grad_scale(x: tf.Tensor, scale=1.0) -> tf.Tensor:
|
|
29
|
+
"""
|
|
30
|
+
Return x in forward and x*scale in backward (for scaling the gradients).
|
|
31
|
+
"""
|
|
32
|
+
x_scaled = scale * x
|
|
33
|
+
error = tf.stop_gradient(x - x_scaled)
|
|
34
|
+
return error + x_scaled
|
|
35
|
+
|
|
36
|
+
|
|
20
37
|
def adjust_range_to_include_zero(range_min: tf.Tensor,
|
|
21
38
|
range_max: tf.Tensor,
|
|
22
39
|
n_bits: int) -> Tuple[tf.Tensor, tf.Tensor]:
|
|
@@ -14,4 +14,6 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
|
|
16
16
|
import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.symmetric_ste
|
|
17
|
-
import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.uniform_ste
|
|
17
|
+
import model_compression_toolkit.qat.pytorch.quantizer.ste_rounding.uniform_ste
|
|
18
|
+
import model_compression_toolkit.qat.pytorch.quantizer.lsq.symmetric_lsq
|
|
19
|
+
import model_compression_toolkit.qat.pytorch.quantizer.lsq.uniform_lsq
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
from typing import Union
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
import torch
|
|
19
|
+
import torch.nn as nn
|
|
20
|
+
|
|
21
|
+
from model_compression_toolkit.qat import TrainingMethod
|
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
|
23
|
+
from mct_quantizers import PytorchQuantizationWrapper
|
|
24
|
+
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
|
25
|
+
from model_compression_toolkit import constants as C
|
|
26
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_quantizer import BasePytorchQATTrainableQuantizer
|
|
27
|
+
from mct_quantizers.common.base_inferable_quantizer import mark_quantizer, QuantizationTarget
|
|
28
|
+
|
|
29
|
+
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
|
30
|
+
from model_compression_toolkit.qat.pytorch.quantizer.quantizer_utils import ste_round, grad_scale
|
|
31
|
+
from mct_quantizers.pytorch.quantizers import \
|
|
32
|
+
WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, ActivationPOTInferableQuantizer, \
|
|
33
|
+
ActivationSymmetricInferableQuantizer
|
|
34
|
+
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
|
35
|
+
TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
|
|
36
|
+
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def symmetric_lsq_quantizer(x: nn.Parameter,
|
|
40
|
+
thresholds: nn.Parameter,
|
|
41
|
+
num_bits: int,
|
|
42
|
+
sign: bool,
|
|
43
|
+
min_int: int,
|
|
44
|
+
max_int: int,
|
|
45
|
+
scale_factor: float) -> Union[nn.Parameter, torch.Tensor]:
|
|
46
|
+
"""
|
|
47
|
+
Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
|
48
|
+
Args:
|
|
49
|
+
x: input to quantize
|
|
50
|
+
thresholds: thresholds of quantization levels
|
|
51
|
+
num_bits: number of bits for quantization
|
|
52
|
+
sign: whether x is signed or not
|
|
53
|
+
min_int: min clipping integer value
|
|
54
|
+
max_int: max clipping integer value
|
|
55
|
+
scale_factor: grad scale of LSQ algorithm
|
|
56
|
+
Returns:
|
|
57
|
+
A quantized tensor
|
|
58
|
+
"""
|
|
59
|
+
delta = thresholds / (2 ** (num_bits - int(sign)))
|
|
60
|
+
delta_scaled = grad_scale(delta, scale_factor)
|
|
61
|
+
rounded = ste_round(x / delta_scaled)
|
|
62
|
+
clipped = torch.clip(rounded, min=min_int, max=max_int)
|
|
63
|
+
quantized = delta_scaled * clipped
|
|
64
|
+
return quantized
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
|
68
|
+
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
|
69
|
+
identifier=TrainingMethod.LSQ)
|
|
70
|
+
class LSQWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
71
|
+
"""
|
|
72
|
+
Trainable constrained quantizer to quantize layer's weights.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, quantization_config: TrainableQuantizerWeightsConfig):
|
|
76
|
+
"""
|
|
77
|
+
Initialize a LSQWeightQATQuantizer object with parameters to use
|
|
78
|
+
for the quantization.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
quantization_config: trainable quantizer config class
|
|
82
|
+
"""
|
|
83
|
+
super().__init__(quantization_config)
|
|
84
|
+
self.power_of_two = quantization_config.weights_quantization_method == QuantizationMethod.POWER_OF_TWO
|
|
85
|
+
self.threshold_values = np.array(quantization_config.weights_quantization_params[C.THRESHOLD])
|
|
86
|
+
if self.power_of_two:
|
|
87
|
+
self.threshold_values = np.power(2.0, np.ceil(np.log2(np.maximum(self.threshold_values, C.MIN_THRESHOLD))))
|
|
88
|
+
self.num_bits = self.quantization_config.weights_n_bits
|
|
89
|
+
n_pos_bits = self.num_bits - int(C.WEIGHTS_SIGNED)
|
|
90
|
+
self.min_int = -int(C.WEIGHTS_SIGNED) * (2 ** n_pos_bits)
|
|
91
|
+
self.max_int = 2 ** n_pos_bits - 1
|
|
92
|
+
self.scale_factor = 1.0 / np.sqrt(self.max_int * self.threshold_values.size)
|
|
93
|
+
|
|
94
|
+
def initialize_quantization(self,
|
|
95
|
+
tensor_shape: torch.Size,
|
|
96
|
+
name: str,
|
|
97
|
+
layer: PytorchQuantizationWrapper):
|
|
98
|
+
"""
|
|
99
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
103
|
+
name: Tensor name.
|
|
104
|
+
layer: Layer to quantize.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
# Add threshold variables to layer.
|
|
108
|
+
layer.register_parameter(name + "_" + THRESHOLD_TENSOR, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
|
|
109
|
+
|
|
110
|
+
# save the quantizer added parameters for later calculations
|
|
111
|
+
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name + "_" + THRESHOLD_TENSOR), VariableGroup.QPARAMS)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def __call__(self,
|
|
115
|
+
inputs: nn.Parameter,
|
|
116
|
+
training: bool) -> nn.Parameter:
|
|
117
|
+
"""
|
|
118
|
+
Quantize a tensor
|
|
119
|
+
Args:
|
|
120
|
+
inputs: Input tensor to quantize.
|
|
121
|
+
training: whether in training mode or not
|
|
122
|
+
Returns:
|
|
123
|
+
quantized tensor
|
|
124
|
+
"""
|
|
125
|
+
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
|
126
|
+
weight_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, C.WEIGHTS_SIGNED, self.min_int, self.max_int, self.scale_factor)
|
|
127
|
+
return weight_quantized
|
|
128
|
+
|
|
129
|
+
def convert2inferable(self) -> Union[WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer]:
|
|
130
|
+
"""
|
|
131
|
+
Convert quantizer to inferable quantizer.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
A pytorch inferable quanizer object.
|
|
135
|
+
"""
|
|
136
|
+
threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy().flatten()
|
|
137
|
+
if self.power_of_two:
|
|
138
|
+
pot_threshold = 2 ** np.ceil(np.log2(threshold_values))
|
|
139
|
+
return WeightsPOTInferableQuantizer(num_bits=self.num_bits,
|
|
140
|
+
threshold=pot_threshold.tolist(),
|
|
141
|
+
per_channel=self.quantization_config.weights_per_channel_threshold,
|
|
142
|
+
channel_axis=self.quantization_config.weights_channels_axis)
|
|
143
|
+
else:
|
|
144
|
+
return WeightsSymmetricInferableQuantizer(num_bits=self.num_bits,
|
|
145
|
+
threshold=threshold_values.tolist(),
|
|
146
|
+
per_channel=self.quantization_config.weights_per_channel_threshold,
|
|
147
|
+
channel_axis=self.quantization_config.weights_channels_axis)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
|
152
|
+
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
|
153
|
+
identifier=TrainingMethod.LSQ)
|
|
154
|
+
class LSQActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
155
|
+
"""
|
|
156
|
+
Trainable constrained quantizer to quantize layer activations.
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
|
160
|
+
"""
|
|
161
|
+
Initialize a LSQActivationQATQuantizer object with parameters to use
|
|
162
|
+
for symmetric or power of two quantization.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
quantization_config: trainable quantizer config class
|
|
166
|
+
"""
|
|
167
|
+
super().__init__(quantization_config)
|
|
168
|
+
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
|
169
|
+
self.sign = quantization_config.activation_quantization_params['is_signed']
|
|
170
|
+
self.threshold_values = np.array([quantization_config.activation_quantization_params[C.THRESHOLD]])
|
|
171
|
+
self.num_bits = quantization_config.activation_n_bits
|
|
172
|
+
n_pos_bits = self.num_bits - int(self.sign)
|
|
173
|
+
self.min_int = -int(self.sign) * (2 ** n_pos_bits)
|
|
174
|
+
self.max_int = (2 ** n_pos_bits) - 1
|
|
175
|
+
|
|
176
|
+
def initialize_quantization(self,
|
|
177
|
+
tensor_shape: torch.Size,
|
|
178
|
+
name: str,
|
|
179
|
+
layer: PytorchQuantizationWrapper):
|
|
180
|
+
"""
|
|
181
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
185
|
+
name: Tensor name.
|
|
186
|
+
layer: Layer to quantize.
|
|
187
|
+
"""
|
|
188
|
+
layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
|
|
189
|
+
|
|
190
|
+
# save the quantizer added parameters for later calculations
|
|
191
|
+
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
|
|
192
|
+
|
|
193
|
+
def __call__(self,
|
|
194
|
+
inputs: torch.Tensor,
|
|
195
|
+
training: bool = True) -> torch.Tensor:
|
|
196
|
+
"""
|
|
197
|
+
Quantize a tensor.
|
|
198
|
+
Args:
|
|
199
|
+
inputs: Input tensor to quantize.
|
|
200
|
+
training: Whether the graph is in training mode.
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
The quantized tensor.
|
|
204
|
+
"""
|
|
205
|
+
|
|
206
|
+
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
|
207
|
+
n_channels = inputs.shape[1]
|
|
208
|
+
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
|
209
|
+
inputs_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor)
|
|
210
|
+
return inputs_quantized
|
|
211
|
+
|
|
212
|
+
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
|
213
|
+
"""
|
|
214
|
+
Convert quantizer to inferable quantizer.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
A pytorch inferable quanizer object.
|
|
218
|
+
"""
|
|
219
|
+
threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
|
|
220
|
+
if self.power_of_two:
|
|
221
|
+
pot_threshold = np.power(2.0, np.ceil(np.log2(threshold_values)))
|
|
222
|
+
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
|
223
|
+
threshold=pot_threshold.tolist(),
|
|
224
|
+
signed=self.sign)
|
|
225
|
+
else:
|
|
226
|
+
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
|
227
|
+
threshold=threshold_values.tolist(),
|
|
228
|
+
signed=self.sign)
|