mct-nightly 2.2.0.20241012.448__py3-none-any.whl → 2.2.0.20241018.449__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/RECORD +22 -14
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/{base_keras_qat_quantizer.py → base_keras_qat_weight_quantizer.py} +3 -13
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +5 -126
- model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +4 -121
- model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +7 -6
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +4 -119
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +5 -95
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/__init__.py +20 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/base_activation_quantizer.py +22 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +127 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/uniform_lsq.py +129 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py +148 -0
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py +122 -0
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +12 -10
- model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py +60 -0
- {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/top_level.txt +0 -0
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
import tensorflow as tf
|
16
|
+
from tensorflow.python.framework.tensor_shape import TensorShape
|
17
|
+
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
18
|
+
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
19
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
20
|
+
|
21
|
+
from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
|
22
|
+
from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer, ActivationUniformInferableQuantizer
|
23
|
+
|
24
|
+
from model_compression_toolkit.qat.keras.quantizer.quant_utils import adjust_range_to_include_zero
|
25
|
+
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
|
26
|
+
from model_compression_toolkit import constants as C
|
27
|
+
|
28
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerActivationConfig
|
29
|
+
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
30
|
+
from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers import BaseKerasActivationTrainableQuantizer
|
31
|
+
|
32
|
+
|
33
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
34
|
+
quantization_method=[QuantizationMethod.UNIFORM],
|
35
|
+
identifier=TrainingMethod.STE)
|
36
|
+
class STEUniformActivationTrainableQuantizer(BaseKerasActivationTrainableQuantizer):
|
37
|
+
"""
|
38
|
+
Trainable constrained quantizer to quantize a layer outputs.
|
39
|
+
"""
|
40
|
+
|
41
|
+
def __init__(self, quantization_config: TrainableQuantizerActivationConfig, freeze_quant_params: bool = False):
|
42
|
+
"""
|
43
|
+
Initialize a STEUniformActivationTrainableQuantizer object with parameters to use
|
44
|
+
for the quantization.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
quantization_config: trainable quantizer config class
|
48
|
+
freeze_quant_params: whether to freeze learnable quantization parameters. This is unused here, since there is not any quantizaiton params that are learned.
|
49
|
+
|
50
|
+
"""
|
51
|
+
super().__init__(quantization_config, freeze_quant_params)
|
52
|
+
|
53
|
+
self.num_bits = quantization_config.activation_n_bits
|
54
|
+
self.min_range = quantization_config.activation_quantization_params[C.RANGE_MIN]
|
55
|
+
self.max_range = quantization_config.activation_quantization_params[C.RANGE_MAX]
|
56
|
+
|
57
|
+
def initialize_quantization(self,
|
58
|
+
tensor_shape: TensorShape,
|
59
|
+
name: str,
|
60
|
+
layer: KerasTrainableQuantizationWrapper):
|
61
|
+
"""
|
62
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
63
|
+
|
64
|
+
Args:
|
65
|
+
tensor_shape: tensor shape of the quantized tensor.
|
66
|
+
name: Tensor name.
|
67
|
+
layer: Layer to quantize.
|
68
|
+
"""
|
69
|
+
fq_min = layer.add_weight(
|
70
|
+
name + FQ_MIN,
|
71
|
+
shape=(),
|
72
|
+
initializer=tf.keras.initializers.Constant(-1.0),
|
73
|
+
trainable=False)
|
74
|
+
fq_min.assign(self.min_range)
|
75
|
+
|
76
|
+
fq_max = layer.add_weight(
|
77
|
+
name + FQ_MAX,
|
78
|
+
shape=(),
|
79
|
+
initializer=tf.keras.initializers.Constant(1.0),
|
80
|
+
trainable=False)
|
81
|
+
fq_max.assign(self.max_range)
|
82
|
+
|
83
|
+
# save the quantizer added parameters for later calculations
|
84
|
+
self.add_quantizer_variable(FQ_MIN, fq_min, VariableGroup.QPARAMS)
|
85
|
+
self.add_quantizer_variable(FQ_MAX, fq_max, VariableGroup.QPARAMS)
|
86
|
+
|
87
|
+
def __call__(self,
|
88
|
+
inputs: tf.Tensor,
|
89
|
+
training: bool):
|
90
|
+
"""
|
91
|
+
Quantize a tensor.
|
92
|
+
Args:
|
93
|
+
inputs: Input tensor to quantize.
|
94
|
+
training: Whether the graph is in training mode.
|
95
|
+
|
96
|
+
Returns:
|
97
|
+
The quantized tensor.
|
98
|
+
"""
|
99
|
+
|
100
|
+
_min = self.get_quantizer_variable(FQ_MIN)
|
101
|
+
_max = self.get_quantizer_variable(FQ_MAX)
|
102
|
+
_min, _max = adjust_range_to_include_zero(_min, _max, self.num_bits)
|
103
|
+
q_tensor = tf.quantization.fake_quant_with_min_max_vars(inputs, _min, _max,
|
104
|
+
num_bits=self.num_bits)
|
105
|
+
|
106
|
+
return q_tensor
|
107
|
+
|
108
|
+
def convert2inferable(self) -> BaseKerasInferableQuantizer:
|
109
|
+
"""
|
110
|
+
Convert quantizer to inferable quantizer.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
BaseKerasInferableQuantizer object.
|
114
|
+
"""
|
115
|
+
min_range, max_range = fix_range_to_include_zero(self.get_quantizer_variable(FQ_MIN).numpy(),
|
116
|
+
self.get_quantizer_variable(FQ_MAX).numpy(),
|
117
|
+
self.num_bits)
|
118
|
+
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
119
|
+
# In activation quantization is per-tensor only - thus we pass
|
120
|
+
# the min/max as lists with a len of 1
|
121
|
+
min_range=[min_range],
|
122
|
+
max_range=[max_range])
|
@@ -28,16 +28,10 @@ if FOUND_TF:
|
|
28
28
|
import tensorflow as tf
|
29
29
|
|
30
30
|
class BaseKerasTrainableQuantizer(BaseTrainableQuantizer):
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
quantization_config: quantizer config class contains all the information about a quantizer configuration.
|
39
|
-
"""
|
40
|
-
super().__init__(quantization_config)
|
31
|
+
"""
|
32
|
+
This class is a base quantizer which validates provided quantization config and defines an abstract function which any quantizer needs to implement.
|
33
|
+
This class adds to the base quantizer a get_config and from_config functions to enable loading and saving the keras model.
|
34
|
+
"""
|
41
35
|
|
42
36
|
def get_config(self) -> Dict[str, Any]:
|
43
37
|
"""
|
@@ -77,6 +71,14 @@ if FOUND_TF:
|
|
77
71
|
quantizer_parameter, parameter_group = parameter_dict[VAR], parameter_dict[GROUP]
|
78
72
|
if quantizer_parameter.trainable and parameter_group == group:
|
79
73
|
quantizer_trainable.append(quantizer_parameter)
|
74
|
+
|
75
|
+
# sanity check to catch inconsistent initialization
|
76
|
+
if self.freeze_quant_params and group == VariableGroup.QPARAMS and quantizer_trainable:
|
77
|
+
Logger.critical(
|
78
|
+
'Found trainable quantization params despite self.freeze_quant_params=True. '
|
79
|
+
'Quantization parameters were probably not initialized correctly in the Quantizer.'
|
80
|
+
) # pragma: no cover
|
81
|
+
|
80
82
|
return quantizer_trainable
|
81
83
|
|
82
84
|
|
@@ -16,6 +16,9 @@
|
|
16
16
|
import numpy as np
|
17
17
|
import tensorflow as tf
|
18
18
|
|
19
|
+
from model_compression_toolkit.qat.keras.quantizer.quant_utils import grad_scale, ste_round, \
|
20
|
+
adjust_range_to_include_zero
|
21
|
+
|
19
22
|
|
20
23
|
def int_quantization_with_threshold(data: tf.Tensor,
|
21
24
|
n_bits: int,
|
@@ -46,3 +49,60 @@ def int_quantization_with_threshold(data: tf.Tensor,
|
|
46
49
|
|
47
50
|
return tf.clip_by_value((data / (threshold + eps)) * (2 ** (n_bits - int(signed))),
|
48
51
|
clip_value_max=clip_max, clip_value_min=clip_min)
|
52
|
+
|
53
|
+
|
54
|
+
def symmetric_lsq_quantizer(x: tf.Tensor,
|
55
|
+
thresholds: tf.Tensor,
|
56
|
+
num_bits: int,
|
57
|
+
sign: bool,
|
58
|
+
min_int: int,
|
59
|
+
max_int:int,
|
60
|
+
scale_factor: float) -> tf.Tensor:
|
61
|
+
"""
|
62
|
+
Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
63
|
+
Args:
|
64
|
+
x: input to quantize
|
65
|
+
thresholds: thresholds of quantization levels
|
66
|
+
num_bits: number of bits for quantization
|
67
|
+
sign: whether x is signed or not
|
68
|
+
min_int: min clipping integer value
|
69
|
+
max_int: max clipping integer value
|
70
|
+
scale_factor: grad scale of LSQ algorithm
|
71
|
+
Returns:
|
72
|
+
A quantized tensor
|
73
|
+
"""
|
74
|
+
delta = thresholds / (2 ** (num_bits - int(sign)))
|
75
|
+
delta_scaled = grad_scale(delta, scale_factor)
|
76
|
+
rounded = ste_round(x / delta_scaled)
|
77
|
+
clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
|
78
|
+
quantized = delta_scaled * clipped
|
79
|
+
return quantized
|
80
|
+
|
81
|
+
|
82
|
+
def uniform_lsq_quantizer(x: tf.Tensor,
|
83
|
+
min_range: tf.Tensor,
|
84
|
+
max_range: tf.Tensor,
|
85
|
+
num_bits: int,
|
86
|
+
min_int: int,
|
87
|
+
max_int:int,
|
88
|
+
scale_factor: float) -> tf.Tensor:
|
89
|
+
"""
|
90
|
+
Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
91
|
+
Args:
|
92
|
+
x: input to quantize
|
93
|
+
min_range: min range of quantization values
|
94
|
+
max_range: min range of quantization values
|
95
|
+
num_bits: number of bits for quantization
|
96
|
+
min_int: min clipping integer value
|
97
|
+
max_int: max clipping integer value
|
98
|
+
scale_factor: grad scale of LSQ algorithm
|
99
|
+
Returns:
|
100
|
+
A quantized tensor
|
101
|
+
"""
|
102
|
+
min_range, max_range = adjust_range_to_include_zero(min_range, max_range, num_bits)
|
103
|
+
delta = (max_range - min_range) / (2 ** num_bits - 1)
|
104
|
+
delta_scaled = grad_scale(delta, scale_factor)
|
105
|
+
rounded = ste_round((x-min_range) / delta_scaled)
|
106
|
+
clipped = tf.math.minimum(tf.math.maximum(rounded, min_int), max_int)
|
107
|
+
quantized = delta_scaled * clipped + min_range
|
108
|
+
return quantized
|
{mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|
{mct_nightly-2.2.0.20241012.448.dist-info → mct_nightly-2.2.0.20241018.449.dist-info}/top_level.txt
RENAMED
File without changes
|