mct-nightly 2.2.0.20241011.452__py3-none-any.whl → 2.2.0.20241017.455__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/RECORD +22 -14
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/{base_keras_qat_quantizer.py → base_keras_qat_weight_quantizer.py} +3 -13
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +5 -126
- model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +4 -121
- model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +7 -6
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +4 -119
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +5 -95
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py +20 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py +22 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +127 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py +129 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py +148 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py +122 -0
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +12 -10
- model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py +60 -0
- {mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20241011.452.dist-info → mct_nightly-2.2.0.20241017.455.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=Pw5laVBBrwaRaU8RB7HsipMx-1faRX33B0Lfa6YX2kA,1573
|
2
2
|
model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -402,15 +402,15 @@ model_compression_toolkit/qat/common/qat_config.py,sha256=xtfVSoyELGXynHNrw86dB9
|
|
402
402
|
model_compression_toolkit/qat/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
403
403
|
model_compression_toolkit/qat/keras/quantization_facade.py,sha256=LNM2HW4cNei3tUhwLdNtsWrox_uSAhaswFxWiMEIrPM,17278
|
404
404
|
model_compression_toolkit/qat/keras/quantizer/__init__.py,sha256=zmYyCa25_KLCSUCGUDRslh3RCIjcRMxc_oXa54Aui-4,996
|
405
|
-
model_compression_toolkit/qat/keras/quantizer/
|
405
|
+
model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py,sha256=EbIt4lMlh6cU4awFLMBp0IlZ2zUUp-WtnlW5Wn19FDM,1793
|
406
406
|
model_compression_toolkit/qat/keras/quantizer/quant_utils.py,sha256=cBULOgWUodcBO1lHevZggdTevuDYI6tQceV86U2x6DA,2543
|
407
|
-
model_compression_toolkit/qat/keras/quantizer/quantization_builder.py,sha256=
|
407
|
+
model_compression_toolkit/qat/keras/quantizer/quantization_builder.py,sha256=hGizGBbOGZpD-w3wg-LlehUYJDWLk91VUdfVwwG2Z78,5882
|
408
408
|
model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
409
|
-
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=
|
410
|
-
model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=
|
409
|
+
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=dtkS0mpjvJntAxpOi-BJx-pCeBF2ReKKeH7y2uwzpH0,6756
|
410
|
+
model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=vGUs9b0IHTydCA5tN7iekuhf1LHNgIrSF5sXMD1WsSI,6476
|
411
411
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
412
|
-
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=
|
413
|
-
model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py,sha256=
|
412
|
+
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=pFkrjtlavCniswcO3-Djlh6a_Hz1rrcEa7Z5wTGVRCU,8270
|
413
|
+
model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py,sha256=ZdZwMwLa1Ws2eo3DiQYYTvPS1JfiswZL1xlQPtRnIgE,7067
|
414
414
|
model_compression_toolkit/qat/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
415
415
|
model_compression_toolkit/qat/pytorch/quantization_facade.py,sha256=NnFy2E_7SR2m8vfh8Q8VrXOXhe7rMScgXnYBtDpsqVs,13456
|
416
416
|
model_compression_toolkit/qat/pytorch/quantizer/__init__.py,sha256=xYa4C8pr9cG1f3mQQcBXO_u3IdJN-zl7leZxuXDs86w,1003
|
@@ -500,11 +500,19 @@ model_compression_toolkit/trainable_infrastructure/common/quant_utils.py,sha256=
|
|
500
500
|
model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py,sha256=My5Wz34jPOyh8z33OTpKnOobRB0cpO_Qgmtsd5lizHo,4791
|
501
501
|
model_compression_toolkit/trainable_infrastructure/common/training_method.py,sha256=LUoeJkloowhZKuHTiOfzjmSUn2G-4of11-rbnL-h0P4,1194
|
502
502
|
model_compression_toolkit/trainable_infrastructure/keras/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
503
|
-
model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=
|
503
|
+
model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=LBc26z8pkpbcdKMTxpNBg5IyChLreHQ1lRgCVjNE37o,4202
|
504
504
|
model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py,sha256=txdWXdZoHazg-3MDPb9P-oXRM92LRn2G_8woEplwKaI,4360
|
505
505
|
model_compression_toolkit/trainable_infrastructure/keras/load_model.py,sha256=DJHibcLo-UCuHV6UPLeVd7dKmPfkGXEiLqCCqvQrISM,3769
|
506
506
|
model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha256=eVB5FSE3OmTLrhfLUcP2knwN1z2_unQLM-xFEGwdafA,5587
|
507
|
-
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=
|
507
|
+
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=r3CaPd4pyM1GDXU2--9NT3wwvl9H6y3QUrVT9spx5es,4189
|
508
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py,sha256=QPBRTl_9ZXF-Yk5srotlKVOmxKTXMm5xf2-9IjIrBAI,1055
|
509
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py,sha256=VvwsrOVZgWed82P9rtu_UDDD99MnZSppPsjrCtxk2AY,964
|
510
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
511
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py,sha256=iBO2hAwJ1HUQzsis_kEGE-BPB3hOW7IF7p2uFLWg09A,6259
|
512
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py,sha256=2BOQXymCZUSLdxDbaS8Blr2FB-NxQV01punWNjMGiNc,5765
|
513
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
514
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py,sha256=THY5eZ_69D1yzkXLhLg84ON_deNUAD_qMJ6A5C5znDM,7359
|
515
|
+
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py,sha256=XEypYorBnSBLj6sh1pHCNaSjeCToYVlERWIHxUoXvuc,5733
|
508
516
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
509
517
|
model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py,sha256=IdUBpZUcOXHLPp2OhwbO_Kytee3OTVuy2032N-tm694,1686
|
510
518
|
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=lWc5EG3ptrP85n69EHGKFkIadnrKEBMKnB5YXQ5AmXo,2745
|
@@ -551,8 +559,8 @@ tests_pytest/pytorch/gptq/test_annealing_cfg.py,sha256=hGC7L6mp3N1ygcJ3OctgS_Fz2
|
|
551
559
|
tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=tI01aFIUaiCILL5Qn--p1E_rLBUelxLdSY3k52lwcx0,4594
|
552
560
|
tests_pytest/pytorch/trainable_infrastructure/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
553
561
|
tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=eNOpSp0GoLxtEdiRypBp8jaujXfdNxBwKh5Rd-P7WLs,1786
|
554
|
-
mct_nightly-2.2.0.
|
555
|
-
mct_nightly-2.2.0.
|
556
|
-
mct_nightly-2.2.0.
|
557
|
-
mct_nightly-2.2.0.
|
558
|
-
mct_nightly-2.2.0.
|
562
|
+
mct_nightly-2.2.0.20241017.455.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
563
|
+
mct_nightly-2.2.0.20241017.455.dist-info/METADATA,sha256=OkKILNYbh3SMCjpEVFD8rXhbjfcGeksV0H7PG-ug_d4,20830
|
564
|
+
mct_nightly-2.2.0.20241017.455.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
565
|
+
mct_nightly-2.2.0.20241017.455.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
|
566
|
+
mct_nightly-2.2.0.20241017.455.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.
|
30
|
+
__version__ = "2.2.0.20241017.000455"
|
@@ -22,24 +22,14 @@ from model_compression_toolkit.trainable_infrastructure import TrainableQuantize
|
|
22
22
|
|
23
23
|
if FOUND_TF:
|
24
24
|
|
25
|
-
class
|
25
|
+
class BaseKerasQATWeightTrainableQuantizer(BaseKerasTrainableQuantizer):
|
26
26
|
"""
|
27
27
|
A base class for trainable Keras quantizer for QAT.
|
28
28
|
"""
|
29
|
-
|
30
|
-
def __init__(self,
|
31
|
-
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
32
|
-
"""
|
33
|
-
Initializes BaseKerasQATTrainableQuantizer object.
|
34
|
-
|
35
|
-
Args:
|
36
|
-
quantization_config: quantizer config class contains all the information about a quantizer configuration.
|
37
|
-
"""
|
38
|
-
|
39
|
-
super().__init__(quantization_config)
|
29
|
+
pass
|
40
30
|
|
41
31
|
else: # pragma: no cover
|
42
|
-
class
|
32
|
+
class BaseKerasQATWeightTrainableQuantizer(BaseKerasTrainableQuantizer):
|
43
33
|
def __init__(self,
|
44
34
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
45
35
|
|
@@ -28,47 +28,18 @@ from mct_quantizers import QuantizationTarget, mark_quantizer
|
|
28
28
|
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
29
29
|
from model_compression_toolkit import constants as C
|
30
30
|
|
31
|
-
from model_compression_toolkit.qat.keras.quantizer.
|
32
|
-
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig,
|
33
|
-
|
34
|
-
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, \
|
35
|
-
ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer
|
31
|
+
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer
|
32
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
|
33
|
+
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer
|
36
34
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
37
35
|
from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale
|
38
|
-
|
39
|
-
|
40
|
-
def symmetric_lsq_quantizer(x: tf.Tensor,
|
41
|
-
thresholds: tf.Tensor,
|
42
|
-
num_bits: int,
|
43
|
-
sign: bool,
|
44
|
-
min_int: int,
|
45
|
-
max_int:int,
|
46
|
-
scale_factor: float) -> tf.Tensor:
|
47
|
-
"""
|
48
|
-
Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
49
|
-
Args:
|
50
|
-
x: input to quantize
|
51
|
-
thresholds: thresholds of quantization levels
|
52
|
-
num_bits: number of bits for quantization
|
53
|
-
sign: whether x is signed or not
|
54
|
-
min_int: min clipping integer value
|
55
|
-
max_int: max clipping integer value
|
56
|
-
scale_factor: grad scale of LSQ algorithm
|
57
|
-
Returns:
|
58
|
-
A quantized tensor
|
59
|
-
"""
|
60
|
-
delta = thresholds / (2 ** (num_bits - int(sign)))
|
61
|
-
delta_scaled = grad_scale(delta, scale_factor)
|
62
|
-
rounded = ste_round(x / delta_scaled)
|
63
|
-
clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
|
64
|
-
quantized = delta_scaled * clipped
|
65
|
-
return quantized
|
36
|
+
from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import symmetric_lsq_quantizer
|
66
37
|
|
67
38
|
|
68
39
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
69
40
|
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
70
41
|
identifier=TrainingMethod.LSQ)
|
71
|
-
class LSQWeightQATQuantizer(
|
42
|
+
class LSQWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer):
|
72
43
|
"""
|
73
44
|
Trainable constrained quantizer to quantize layer's weights.
|
74
45
|
"""
|
@@ -159,95 +130,3 @@ class LSQWeightQATQuantizer(BaseKerasQATTrainableQuantizer):
|
|
159
130
|
input_rank=len(self.threshold_shape))
|
160
131
|
|
161
132
|
|
162
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
163
|
-
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
164
|
-
identifier=TrainingMethod.LSQ)
|
165
|
-
class LSQActivationQATQuantizer(BaseKerasQATTrainableQuantizer):
|
166
|
-
"""
|
167
|
-
Trainable constrained quantizer to quantize layer activations.
|
168
|
-
"""
|
169
|
-
|
170
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
171
|
-
"""
|
172
|
-
Initialize a LSQActivationQATQuantizer object with parameters to use
|
173
|
-
for the quantization.
|
174
|
-
|
175
|
-
Args:
|
176
|
-
quantization_config: trainable quantizer config class
|
177
|
-
"""
|
178
|
-
super().__init__(quantization_config)
|
179
|
-
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
180
|
-
self.threshold_values = float(quantization_config.activation_quantization_params[C.THRESHOLD])
|
181
|
-
self.threshold_shape = np.asarray(self.threshold_values).shape
|
182
|
-
self.sign = quantization_config.activation_quantization_params[SIGNED]
|
183
|
-
self.num_bits = quantization_config.activation_n_bits
|
184
|
-
n_pos_bits = self.num_bits - int(self.sign)
|
185
|
-
self.min_int = -int(self.sign) * (2 ** n_pos_bits)
|
186
|
-
self.max_int = (2 ** n_pos_bits) - 1
|
187
|
-
if self.power_of_two:
|
188
|
-
self.threshold_values = np.power(2.0, np.ceil(np.log2(np.maximum(self.threshold_values, C.MIN_THRESHOLD))))
|
189
|
-
|
190
|
-
|
191
|
-
def initialize_quantization(self,
|
192
|
-
tensor_shape: TensorShape,
|
193
|
-
name: str,
|
194
|
-
layer: KerasTrainableQuantizationWrapper):
|
195
|
-
"""
|
196
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
197
|
-
|
198
|
-
Args:
|
199
|
-
tensor_shape: tensor shape of the quantized tensor.
|
200
|
-
name: Tensor name.
|
201
|
-
layer: Layer to quantize.
|
202
|
-
"""
|
203
|
-
ptq_threshold_tensor = layer.add_weight(
|
204
|
-
name + THRESHOLD_TENSOR,
|
205
|
-
shape=(),
|
206
|
-
initializer=tf.keras.initializers.Constant(1.0),
|
207
|
-
trainable=True)
|
208
|
-
ptq_threshold_tensor.assign(self.threshold_values)
|
209
|
-
|
210
|
-
# save the quantizer added parameters for later calculations
|
211
|
-
self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS)
|
212
|
-
|
213
|
-
def __call__(self,
|
214
|
-
inputs: tf.Tensor,
|
215
|
-
training: bool):
|
216
|
-
"""
|
217
|
-
Quantize a tensor.
|
218
|
-
Args:
|
219
|
-
inputs: Input tensor to quantize.
|
220
|
-
training: Whether the graph is in training mode.
|
221
|
-
|
222
|
-
Returns:
|
223
|
-
The quantized tensor.
|
224
|
-
"""
|
225
|
-
|
226
|
-
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
227
|
-
n_channels = inputs.shape[-1]
|
228
|
-
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
229
|
-
q_tensor = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor)
|
230
|
-
return q_tensor
|
231
|
-
|
232
|
-
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
233
|
-
"""
|
234
|
-
Convert quantizer to inferable quantizer.
|
235
|
-
|
236
|
-
Returns:
|
237
|
-
BaseKerasInferableQuantizer object.
|
238
|
-
"""
|
239
|
-
|
240
|
-
if self.power_of_two:
|
241
|
-
thresholds = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR).numpy()))
|
242
|
-
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
243
|
-
# In activation quantization is per-tensor only - thus we pass
|
244
|
-
# the threshold as a list with a len of 1
|
245
|
-
threshold=[thresholds],
|
246
|
-
signed=self.sign)
|
247
|
-
else:
|
248
|
-
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR).numpy()
|
249
|
-
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
250
|
-
# In activation quantization is per-tensor only - thus we
|
251
|
-
# pass the threshold as a list with a len of 1
|
252
|
-
threshold=[thresholds],
|
253
|
-
signed=self.sign)
|
@@ -16,6 +16,8 @@ import numpy as np
|
|
16
16
|
import tensorflow as tf
|
17
17
|
from tensorflow.python.framework.tensor_shape import TensorShape
|
18
18
|
from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX
|
19
|
+
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \
|
20
|
+
BaseKerasQATWeightTrainableQuantizer
|
19
21
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
20
22
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
21
23
|
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
@@ -26,47 +28,18 @@ from mct_quantizers.keras.quantizers import \
|
|
26
28
|
|
27
29
|
from model_compression_toolkit import constants as C
|
28
30
|
|
29
|
-
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer
|
30
31
|
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, \
|
31
32
|
TrainableQuantizerActivationConfig
|
32
33
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
33
34
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
|
34
35
|
from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale, adjust_range_to_include_zero
|
35
|
-
|
36
|
-
|
37
|
-
def uniform_lsq_quantizer(x: tf.Tensor,
|
38
|
-
min_range: tf.Tensor,
|
39
|
-
max_range: tf.Tensor,
|
40
|
-
num_bits: int,
|
41
|
-
min_int: int,
|
42
|
-
max_int:int,
|
43
|
-
scale_factor: float) -> tf.Tensor:
|
44
|
-
"""
|
45
|
-
Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
46
|
-
Args:
|
47
|
-
x: input to quantize
|
48
|
-
min_range: min range of quantization values
|
49
|
-
max_range: min range of quantization values
|
50
|
-
num_bits: number of bits for quantization
|
51
|
-
min_int: min clipping integer value
|
52
|
-
max_int: max clipping integer value
|
53
|
-
scale_factor: grad scale of LSQ algorithm
|
54
|
-
Returns:
|
55
|
-
A quantized tensor
|
56
|
-
"""
|
57
|
-
min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits)
|
58
|
-
delta = (max_range - min_range) / (2 ** num_bits - 1)
|
59
|
-
delta_scaled = grad_scale(delta, scale_factor)
|
60
|
-
rounded = ste_round((x-min_range) / delta_scaled)
|
61
|
-
clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
|
62
|
-
quantized = delta_scaled * clipped + min_range
|
63
|
-
return quantized
|
36
|
+
from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import uniform_lsq_quantizer
|
64
37
|
|
65
38
|
|
66
39
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
67
40
|
quantization_method=[QuantizationMethod.UNIFORM],
|
68
41
|
identifier=TrainingMethod.LSQ)
|
69
|
-
class LSQUniformWeightQATQuantizer(
|
42
|
+
class LSQUniformWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer):
|
70
43
|
"""
|
71
44
|
Trainable constrained quantizer to quantize layer's weights.
|
72
45
|
"""
|
@@ -158,93 +131,3 @@ class LSQUniformWeightQATQuantizer(BaseKerasQATTrainableQuantizer):
|
|
158
131
|
channel_axis=self.channel_axis,
|
159
132
|
input_rank=len(self.min_max_shape))
|
160
133
|
|
161
|
-
|
162
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
163
|
-
quantization_method=[QuantizationMethod.UNIFORM],
|
164
|
-
identifier=TrainingMethod.LSQ)
|
165
|
-
class LSQUniformActivationQATQuantizer(BaseKerasQATTrainableQuantizer):
|
166
|
-
"""
|
167
|
-
Trainable constrained quantizer to quantize layer activations.
|
168
|
-
"""
|
169
|
-
|
170
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
171
|
-
"""
|
172
|
-
Initialize a LSQUniformActivationQATQuantizer object with parameters to use
|
173
|
-
for the quantization.
|
174
|
-
|
175
|
-
Args:
|
176
|
-
quantization_config: trainable quantizer config class
|
177
|
-
"""
|
178
|
-
super().__init__(quantization_config)
|
179
|
-
|
180
|
-
self.num_bits = quantization_config.activation_n_bits
|
181
|
-
self.min_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MIN])
|
182
|
-
self.max_range = np.array(quantization_config.activation_quantization_params[C.RANGE_MAX])
|
183
|
-
self.min_int = 0
|
184
|
-
self.max_int = 2**self.num_bits - 1
|
185
|
-
|
186
|
-
def initialize_quantization(self,
|
187
|
-
tensor_shape: TensorShape,
|
188
|
-
name: str,
|
189
|
-
layer: KerasTrainableQuantizationWrapper):
|
190
|
-
"""
|
191
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
192
|
-
|
193
|
-
Args:
|
194
|
-
tensor_shape: tensor shape of the quantized tensor.
|
195
|
-
name: Tensor name.
|
196
|
-
layer: Layer to quantize.
|
197
|
-
"""
|
198
|
-
fq_min = layer.add_weight(
|
199
|
-
name + FQ_MIN,
|
200
|
-
shape=(),
|
201
|
-
initializer=tf.keras.initializers.Constant(-1.0),
|
202
|
-
trainable=True)
|
203
|
-
fq_min.assign(self.min_range)
|
204
|
-
|
205
|
-
fq_max = layer.add_weight(
|
206
|
-
name + FQ_MAX,
|
207
|
-
shape=(),
|
208
|
-
initializer=tf.keras.initializers.Constant(1.0),
|
209
|
-
trainable=True)
|
210
|
-
fq_max.assign(self.max_range)
|
211
|
-
|
212
|
-
# save the quantizer added parameters for later calculations
|
213
|
-
self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
|
214
|
-
self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
|
215
|
-
|
216
|
-
def __call__(self,
|
217
|
-
inputs: tf.Tensor,
|
218
|
-
training: bool):
|
219
|
-
"""
|
220
|
-
Quantize a tensor.
|
221
|
-
Args:
|
222
|
-
inputs: Input tensor to quantize.
|
223
|
-
training: Whether the graph is in training mode.
|
224
|
-
|
225
|
-
Returns:
|
226
|
-
The quantized tensor.
|
227
|
-
"""
|
228
|
-
|
229
|
-
min_range = self.get_quantizer_variable(FQ_MIN)
|
230
|
-
max_range = self.get_quantizer_variable(FQ_MAX)
|
231
|
-
n_channels = inputs.shape[-1]
|
232
|
-
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
233
|
-
q_tensor = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor)
|
234
|
-
return q_tensor
|
235
|
-
|
236
|
-
def convert2inferable(self) -> BaseKerasInferableQuantizer:
|
237
|
-
"""
|
238
|
-
Convert quantizer to inferable quantizer.
|
239
|
-
|
240
|
-
Returns:
|
241
|
-
BaseKerasInferableQuantizer object.
|
242
|
-
"""
|
243
|
-
min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
|
244
|
-
self.get_quantizer_variable(FQ_MAX).numpy(),
|
245
|
-
self.num_bits)
|
246
|
-
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
247
|
-
# In activation quantization is per-tensor only - thus we pass
|
248
|
-
# the min/max as lists with a len of 1
|
249
|
-
min_range=[min_range],
|
250
|
-
max_range=[max_range])
|
@@ -15,17 +15,18 @@
|
|
15
15
|
from typing import Tuple, Dict, List, Callable
|
16
16
|
|
17
17
|
from model_compression_toolkit.core import common
|
18
|
-
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
19
|
-
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
|
20
18
|
from model_compression_toolkit.logger import Logger
|
21
19
|
from model_compression_toolkit.qat.common.qat_config import QATConfig
|
22
|
-
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer
|
23
20
|
from mct_quantizers import QuantizationTarget, KerasActivationQuantizationHolder
|
21
|
+
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import \
|
22
|
+
BaseKerasQATWeightTrainableQuantizer
|
24
23
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \
|
25
24
|
get_trainable_quantizer_weights_config, get_trainable_quantizer_activation_config, \
|
26
25
|
get_trainable_quantizer_quantization_candidates
|
27
26
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \
|
28
27
|
get_trainable_quantizer_class
|
28
|
+
from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import \
|
29
|
+
BaseKerasActivationTrainableQuantizer
|
29
30
|
|
30
31
|
|
31
32
|
def get_activation_quantizer_holder(n: common.BaseNode,
|
@@ -55,7 +56,7 @@ def get_activation_quantizer_holder(n: common.BaseNode,
|
|
55
56
|
def quantization_builder(n: common.BaseNode,
|
56
57
|
qat_config: QATConfig,
|
57
58
|
kernel_attr: str = None,
|
58
|
-
) -> Tuple[Dict[str,
|
59
|
+
) -> Tuple[Dict[str, BaseKerasQATWeightTrainableQuantizer], List[BaseKerasActivationTrainableQuantizer]]:
|
59
60
|
"""
|
60
61
|
Build quantizers for a node according to its quantization configuration.
|
61
62
|
|
@@ -82,7 +83,7 @@ def quantization_builder(n: common.BaseNode,
|
|
82
83
|
quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Weights,
|
83
84
|
qat_config.weight_training_method,
|
84
85
|
quant_method,
|
85
|
-
|
86
|
+
BaseKerasQATWeightTrainableQuantizer)
|
86
87
|
|
87
88
|
weight_quantizers.update({kernel_attr: quantizer_class(get_trainable_quantizer_weights_config(n,
|
88
89
|
attr_name=kernel_attr,
|
@@ -98,7 +99,7 @@ def quantization_builder(n: common.BaseNode,
|
|
98
99
|
quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Activation,
|
99
100
|
qat_config.activation_training_method,
|
100
101
|
quant_method,
|
101
|
-
|
102
|
+
BaseKerasActivationTrainableQuantizer)
|
102
103
|
|
103
104
|
activation_quantizers = [quantizer_class(get_trainable_quantizer_activation_config(n, aq_cand),
|
104
105
|
**qat_config.activation_quantizer_params_override)] * len(output_shapes)
|
@@ -18,7 +18,6 @@ from typing import Union
|
|
18
18
|
import numpy as np
|
19
19
|
import tensorflow as tf
|
20
20
|
from tensorflow.python.framework.tensor_shape import TensorShape
|
21
|
-
from model_compression_toolkit.constants import SIGNED
|
22
21
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
23
22
|
|
24
23
|
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
@@ -29,18 +28,16 @@ from mct_quantizers import QuantizationTarget, mark_quantizer
|
|
29
28
|
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
30
29
|
from model_compression_toolkit import constants as C
|
31
30
|
|
32
|
-
from model_compression_toolkit.qat.keras.quantizer.
|
33
|
-
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
|
34
|
-
|
35
|
-
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, \
|
36
|
-
ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer
|
31
|
+
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer
|
32
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
|
33
|
+
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
37
34
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
38
35
|
|
39
36
|
|
40
37
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
41
38
|
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
42
39
|
identifier=TrainingMethod.STE)
|
43
|
-
class STEWeightQATQuantizer(
|
40
|
+
class STEWeightQATQuantizer(BaseKerasQATWeightTrainableQuantizer):
|
44
41
|
"""
|
45
42
|
Trainable constrained quantizer to quantize a layer inputs.
|
46
43
|
"""
|
@@ -171,115 +168,3 @@ class STEWeightQATQuantizer(BaseKerasQATTrainableQuantizer):
|
|
171
168
|
input_rank=len(self.threshold_shape))
|
172
169
|
|
173
170
|
|
174
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
175
|
-
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
176
|
-
identifier=TrainingMethod.STE)
|
177
|
-
class STEActivationQATQuantizer(BaseKerasQATTrainableQuantizer):
|
178
|
-
"""
|
179
|
-
Trainable constrained quantizer to quantize a layer outputs.
|
180
|
-
"""
|
181
|
-
|
182
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
183
|
-
"""
|
184
|
-
Initialize a STEActivationQATQuantizer object with parameters to use
|
185
|
-
for the quantization.
|
186
|
-
|
187
|
-
Args:
|
188
|
-
quantization_config: trainable quantizer config class
|
189
|
-
"""
|
190
|
-
super().__init__(quantization_config)
|
191
|
-
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
192
|
-
self.threshold_values = quantization_config.activation_quantization_params[C.THRESHOLD]
|
193
|
-
self.threshold_shape = np.asarray(self.threshold_values).shape
|
194
|
-
self.np_threshold_values = float(self.threshold_values)
|
195
|
-
self.signed = quantization_config.activation_quantization_params[SIGNED]
|
196
|
-
if self.power_of_two:
|
197
|
-
self.np_threshold_values = np.power(2.0,
|
198
|
-
np.ceil(np.log2(np.maximum(self.np_threshold_values, C.MIN_THRESHOLD))))
|
199
|
-
self.num_bits = quantization_config.activation_n_bits
|
200
|
-
delta = self.np_threshold_values / np.power(2.0, self.num_bits - int(self.signed))
|
201
|
-
min_int = -int(self.signed) * (2 ** (self.num_bits - int(self.signed)))
|
202
|
-
max_int = (2 ** (self.num_bits - int(self.signed))) - 1
|
203
|
-
self.min = delta * min_int
|
204
|
-
self.max = delta * max_int
|
205
|
-
|
206
|
-
def initialize_quantization(self,
|
207
|
-
tensor_shape: TensorShape,
|
208
|
-
name: str,
|
209
|
-
layer: KerasTrainableQuantizationWrapper):
|
210
|
-
"""
|
211
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
212
|
-
|
213
|
-
Args:
|
214
|
-
tensor_shape: tensor shape of the quantized tensor.
|
215
|
-
name: Tensor name.
|
216
|
-
layer: Layer to quantize.
|
217
|
-
"""
|
218
|
-
ptq_threshold_tensor = layer.add_weight(
|
219
|
-
name + THRESHOLD_TENSOR,
|
220
|
-
shape=(),
|
221
|
-
initializer=tf.keras.initializers.Constant(1.0),
|
222
|
-
trainable=False)
|
223
|
-
ptq_threshold_tensor.assign(self.np_threshold_values)
|
224
|
-
|
225
|
-
fq_min = layer.add_weight(
|
226
|
-
name + FQ_MIN,
|
227
|
-
shape=(),
|
228
|
-
initializer=tf.keras.initializers.Constant(-1.0),
|
229
|
-
trainable=False)
|
230
|
-
fq_min.assign(self.min)
|
231
|
-
|
232
|
-
fq_max = layer.add_weight(
|
233
|
-
name + FQ_MAX,
|
234
|
-
shape=(),
|
235
|
-
initializer=tf.keras.initializers.Constant(1.0),
|
236
|
-
trainable=False)
|
237
|
-
fq_max.assign(self.max)
|
238
|
-
|
239
|
-
# save the quantizer added parameters for later calculations
|
240
|
-
self.add_quantizer_variable(THRESHOLD_TENSOR, ptq_threshold_tensor, VariableGroup.QPARAMS)
|
241
|
-
self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
|
242
|
-
self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
|
243
|
-
|
244
|
-
|
245
|
-
def __call__(self,
|
246
|
-
inputs: tf.Tensor,
|
247
|
-
training: bool):
|
248
|
-
"""
|
249
|
-
Quantize a tensor.
|
250
|
-
Args:
|
251
|
-
inputs: Input tensor to quantize.
|
252
|
-
training: Whether the graph is in training mode.
|
253
|
-
|
254
|
-
Returns:
|
255
|
-
The quantized tensor.
|
256
|
-
"""
|
257
|
-
|
258
|
-
_min = self.get_quantizer_variable(FQ_MIN)
|
259
|
-
_max = self.get_quantizer_variable(FQ_MAX)
|
260
|
-
q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max,
|
261
|
-
num_bits=self.num_bits)
|
262
|
-
|
263
|
-
return q_tensor
|
264
|
-
|
265
|
-
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
266
|
-
"""
|
267
|
-
Convert quantizer to inferable quantizer.
|
268
|
-
|
269
|
-
Returns:
|
270
|
-
BaseKerasInferableQuantizer object.
|
271
|
-
"""
|
272
|
-
|
273
|
-
if self.power_of_two:
|
274
|
-
pot_threshold = 2 ** np.ceil(np.log2(self.get_quantizer_variable(THRESHOLD_TENSOR)))
|
275
|
-
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
276
|
-
# In activation quantization is per-tensor only - thus we pass
|
277
|
-
# the threshold as a list with a len of 1
|
278
|
-
threshold=[pot_threshold],
|
279
|
-
signed=self.signed)
|
280
|
-
else:
|
281
|
-
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
282
|
-
# In activation quantization is per-tensor only - thus we
|
283
|
-
# pass the threshold as a list with a len of 1
|
284
|
-
threshold=[self.get_quantizer_variable(THRESHOLD_TENSOR).numpy()],
|
285
|
-
signed=self.signed)
|