mct-nightly 2.2.0.20240902.511__py3-none-any.whl → 2.2.0.20240904.449__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/METADATA +6 -6
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/RECORD +35 -26
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/qat/__init__.py +2 -2
- model_compression_toolkit/qat/common/qat_config.py +1 -19
- model_compression_toolkit/qat/keras/quantization_facade.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/{base_pytorch_qat_quantizer.py → base_pytorch_qat_weight_quantizer.py} +4 -13
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +6 -116
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +12 -122
- model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +8 -7
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +6 -84
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +6 -85
- model_compression_toolkit/trainable_infrastructure/__init__.py +9 -3
- model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +9 -8
- model_compression_toolkit/trainable_infrastructure/common/training_method.py +31 -0
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -2
- model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +2 -2
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py +19 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py +22 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/symmetric_lsq.py +111 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/uniform_lsq.py +106 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/symmetric_ste.py +108 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/uniform_ste.py +105 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +7 -14
- model_compression_toolkit/{qat/pytorch/quantizer → trainable_infrastructure/pytorch}/quantizer_utils.py +79 -2
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/top_level.txt +0 -0
@@ -17,16 +17,17 @@ from typing import List, Dict, Tuple, Callable
|
|
17
17
|
from mct_quantizers import PytorchActivationQuantizationHolder, QuantizationTarget
|
18
18
|
|
19
19
|
from model_compression_toolkit.core import common
|
20
|
-
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
21
20
|
from model_compression_toolkit.qat.common.qat_config import QATConfig
|
22
|
-
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
|
23
21
|
from model_compression_toolkit.logger import Logger
|
24
22
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \
|
25
23
|
get_trainable_quantizer_quantization_candidates, get_trainable_quantizer_weights_config, \
|
26
24
|
get_trainable_quantizer_activation_config
|
27
|
-
from model_compression_toolkit.qat.pytorch.quantizer.
|
25
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_weight_quantizer import BasePytorchQATWeightTrainableQuantizer
|
28
26
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \
|
29
27
|
get_trainable_quantizer_class
|
28
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.activation_quantizers.base_activation_quantizer import \
|
29
|
+
BasePytorchActivationTrainableQuantizer
|
30
|
+
|
30
31
|
|
31
32
|
def get_activation_quantizer_holder(n: common.BaseNode,
|
32
33
|
qat_config: QATConfig) -> Callable:
|
@@ -55,8 +56,8 @@ def get_activation_quantizer_holder(n: common.BaseNode,
|
|
55
56
|
def quantization_builder(n: common.BaseNode,
|
56
57
|
qat_config: QATConfig,
|
57
58
|
kernel_attr: str = None,
|
58
|
-
) -> Tuple[Dict[str,
|
59
|
-
List[
|
59
|
+
) -> Tuple[Dict[str, BasePytorchQATWeightTrainableQuantizer],
|
60
|
+
List[BasePytorchActivationTrainableQuantizer]]:
|
60
61
|
"""
|
61
62
|
Build quantizers for a node according to its quantization configuration.
|
62
63
|
|
@@ -82,7 +83,7 @@ def quantization_builder(n: common.BaseNode,
|
|
82
83
|
quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Weights,
|
83
84
|
qat_config.weight_training_method,
|
84
85
|
quant_method,
|
85
|
-
|
86
|
+
BasePytorchQATWeightTrainableQuantizer)
|
86
87
|
|
87
88
|
weight_quantizers.update({kernel_attr: quantizer_class(get_trainable_quantizer_weights_config(n,
|
88
89
|
attr_name=kernel_attr,
|
@@ -95,7 +96,7 @@ def quantization_builder(n: common.BaseNode,
|
|
95
96
|
quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Activation,
|
96
97
|
qat_config.activation_training_method,
|
97
98
|
quant_method,
|
98
|
-
|
99
|
+
BasePytorchActivationTrainableQuantizer)
|
99
100
|
|
100
101
|
activation_quantizers = [quantizer_class(get_trainable_quantizer_activation_config(n, aq_cand),
|
101
102
|
**qat_config.activation_quantizer_params_override)]
|
@@ -18,28 +18,27 @@ import numpy as np
|
|
18
18
|
import torch
|
19
19
|
import torch.nn as nn
|
20
20
|
|
21
|
-
from model_compression_toolkit.qat import TrainingMethod
|
22
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
23
22
|
from mct_quantizers import PytorchQuantizationWrapper
|
24
23
|
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
25
24
|
from model_compression_toolkit import constants as C
|
26
|
-
from model_compression_toolkit.qat.pytorch.quantizer.
|
25
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_weight_quantizer import BasePytorchQATWeightTrainableQuantizer
|
27
26
|
from mct_quantizers.common.base_inferable_quantizer import mark_quantizer, QuantizationTarget
|
28
27
|
|
29
28
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
30
|
-
from model_compression_toolkit.
|
29
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
30
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.quantizer_utils import ste_round, ste_clip
|
31
31
|
from mct_quantizers.pytorch.quantizers import \
|
32
|
-
WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
33
|
-
ActivationSymmetricInferableQuantizer
|
32
|
+
WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
34
33
|
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
35
|
-
TrainableQuantizerWeightsConfig
|
34
|
+
TrainableQuantizerWeightsConfig
|
36
35
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
37
36
|
|
38
37
|
|
39
38
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
40
39
|
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
41
40
|
identifier=TrainingMethod.STE)
|
42
|
-
class STEWeightQATQuantizer(
|
41
|
+
class STEWeightQATQuantizer(BasePytorchQATWeightTrainableQuantizer):
|
43
42
|
"""
|
44
43
|
Trainable constrained quantizer to quantize a layer weights.
|
45
44
|
"""
|
@@ -127,80 +126,3 @@ class STEWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
127
126
|
threshold=np_threshold.tolist(),
|
128
127
|
per_channel=self.quantization_config.weights_per_channel_threshold,
|
129
128
|
channel_axis=self.quantization_config.weights_channels_axis)
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
134
|
-
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
135
|
-
identifier=TrainingMethod.STE)
|
136
|
-
class STEActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
137
|
-
"""
|
138
|
-
Trainable constrained quantizer to quantize a layer activations.
|
139
|
-
"""
|
140
|
-
|
141
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
142
|
-
"""
|
143
|
-
Initialize a STEActivationQATQuantizer object with parameters to use
|
144
|
-
for symmetric or power of two quantization.
|
145
|
-
|
146
|
-
Args:
|
147
|
-
quantization_config: trainable quantizer config class
|
148
|
-
"""
|
149
|
-
super().__init__(quantization_config)
|
150
|
-
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
151
|
-
self.sign = quantization_config.activation_quantization_params['is_signed']
|
152
|
-
np_threshold_values = quantization_config.activation_quantization_params[C.THRESHOLD]
|
153
|
-
self.threshold_tensor = torch.Tensor([np_threshold_values])
|
154
|
-
self.num_bits = quantization_config.activation_n_bits
|
155
|
-
|
156
|
-
def initialize_quantization(self,
|
157
|
-
tensor_shape: torch.Size,
|
158
|
-
name: str,
|
159
|
-
layer: PytorchQuantizationWrapper):
|
160
|
-
"""
|
161
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
162
|
-
|
163
|
-
Args:
|
164
|
-
tensor_shape: tensor shape of the quantized tensor.
|
165
|
-
name: Tensor name.
|
166
|
-
layer: Layer to quantize.
|
167
|
-
"""
|
168
|
-
layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_tensor), requires_grad=True))
|
169
|
-
|
170
|
-
# save the quantizer added parameters for later calculations
|
171
|
-
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
|
172
|
-
|
173
|
-
def __call__(self,
|
174
|
-
inputs: torch.Tensor,
|
175
|
-
training: bool = True) -> torch.Tensor:
|
176
|
-
"""
|
177
|
-
Quantize a tensor.
|
178
|
-
Args:
|
179
|
-
inputs: Input tensor to quantize.
|
180
|
-
training: Whether the graph is in training mode.
|
181
|
-
|
182
|
-
Returns:
|
183
|
-
The quantized tensor.
|
184
|
-
"""
|
185
|
-
|
186
|
-
_t = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
187
|
-
q_tensor = symmetric_quantizer(inputs, _t, self.num_bits, sign=self.sign)
|
188
|
-
return q_tensor
|
189
|
-
|
190
|
-
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
191
|
-
"""
|
192
|
-
Convert quantizer to inferable quantizer.
|
193
|
-
|
194
|
-
Returns:
|
195
|
-
A pytorch inferable quanizer object.
|
196
|
-
"""
|
197
|
-
np_threshold = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
|
198
|
-
if self.power_of_two:
|
199
|
-
pot_threshold = np.power(2.0, np.ceil(np.log2(np_threshold)))
|
200
|
-
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
201
|
-
threshold=pot_threshold.tolist(),
|
202
|
-
signed=self.sign)
|
203
|
-
else:
|
204
|
-
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
205
|
-
threshold=np_threshold.tolist(),
|
206
|
-
signed=self.sign)
|
@@ -20,26 +20,25 @@ from torch import Tensor
|
|
20
20
|
from model_compression_toolkit.constants import RANGE_MAX, RANGE_MIN
|
21
21
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
22
22
|
|
23
|
-
from model_compression_toolkit.qat import TrainingMethod
|
24
23
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
25
24
|
from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
|
26
|
-
from model_compression_toolkit import constants as C
|
27
25
|
|
28
|
-
from model_compression_toolkit.qat.pytorch.quantizer.
|
26
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_weight_quantizer import BasePytorchQATWeightTrainableQuantizer
|
29
27
|
from mct_quantizers import mark_quantizer
|
30
28
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
31
|
-
from model_compression_toolkit.
|
29
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
30
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.quantizer_utils import uniform_quantizer
|
32
31
|
from mct_quantizers.pytorch.quantizers import \
|
33
|
-
WeightsUniformInferableQuantizer
|
32
|
+
WeightsUniformInferableQuantizer
|
34
33
|
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
35
|
-
TrainableQuantizerWeightsConfig
|
34
|
+
TrainableQuantizerWeightsConfig
|
36
35
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
37
36
|
|
38
37
|
|
39
38
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
40
39
|
quantization_method=[QuantizationMethod.UNIFORM],
|
41
40
|
identifier=TrainingMethod.STE)
|
42
|
-
class STEUniformWeightQATQuantizer(
|
41
|
+
class STEUniformWeightQATQuantizer(BasePytorchQATWeightTrainableQuantizer):
|
43
42
|
"""
|
44
43
|
Trainable constrained quantizer to quantize a layer inputs.
|
45
44
|
"""
|
@@ -66,7 +65,6 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
66
65
|
[-1]) if self.quantization_config.weights_per_channel_threshold else float(
|
67
66
|
self.min_values)
|
68
67
|
|
69
|
-
|
70
68
|
def initialize_quantization(self,
|
71
69
|
tensor_shape: torch.Size,
|
72
70
|
name: str,
|
@@ -88,7 +86,6 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
88
86
|
self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
|
89
87
|
self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
|
90
88
|
|
91
|
-
|
92
89
|
def __call__(self,
|
93
90
|
inputs: nn.Parameter,
|
94
91
|
training: bool) -> Tensor:
|
@@ -117,79 +114,3 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
117
114
|
max_range=_max.tolist(),
|
118
115
|
per_channel=self.quantization_config.weights_per_channel_threshold,
|
119
116
|
channel_axis=self.quantization_config.weights_channels_axis)
|
120
|
-
|
121
|
-
|
122
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
123
|
-
quantization_method=[QuantizationMethod.UNIFORM],
|
124
|
-
identifier=TrainingMethod.STE)
|
125
|
-
class STEUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
126
|
-
"""
|
127
|
-
Trainable constrained quantizer to quantize a layer activations.
|
128
|
-
"""
|
129
|
-
|
130
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
131
|
-
"""
|
132
|
-
Initialize a STEUniformActivationQATQuantizer object with parameters to use
|
133
|
-
for uniform quantization.
|
134
|
-
|
135
|
-
Args:
|
136
|
-
quantization_config: trainable quantizer config class
|
137
|
-
"""
|
138
|
-
super().__init__(quantization_config)
|
139
|
-
|
140
|
-
np_min_range = quantization_config.activation_quantization_params[C.RANGE_MIN]
|
141
|
-
np_max_range = quantization_config.activation_quantization_params[C.RANGE_MAX]
|
142
|
-
self.min_range_tensor = torch.Tensor([np_min_range])
|
143
|
-
self.max_range_tensor = torch.Tensor([np_max_range])
|
144
|
-
self.num_bits = quantization_config.activation_n_bits
|
145
|
-
|
146
|
-
def initialize_quantization(self,
|
147
|
-
tensor_shape: torch.Size,
|
148
|
-
name: str,
|
149
|
-
layer: PytorchQuantizationWrapper):
|
150
|
-
"""
|
151
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
152
|
-
|
153
|
-
Args:
|
154
|
-
tensor_shape: tensor shape of the quantized tensor.
|
155
|
-
name: Tensor name.
|
156
|
-
layer: Layer to quantize.
|
157
|
-
"""
|
158
|
-
layer.register_parameter(name+"_"+FQ_MIN, nn.Parameter(to_torch_tensor(self.min_range_tensor), requires_grad=True))
|
159
|
-
layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_range_tensor), requires_grad=True))
|
160
|
-
|
161
|
-
# Save the quantizer parameters for later calculations
|
162
|
-
self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
|
163
|
-
self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
|
164
|
-
|
165
|
-
def __call__(self,
|
166
|
-
inputs: torch.Tensor,
|
167
|
-
training: bool = True) -> torch.Tensor:
|
168
|
-
"""
|
169
|
-
Quantize a tensor.
|
170
|
-
Args:
|
171
|
-
inputs: Input tensor to quantize.
|
172
|
-
training: Whether the graph is in training mode.
|
173
|
-
|
174
|
-
Returns:
|
175
|
-
The quantized tensor.
|
176
|
-
"""
|
177
|
-
|
178
|
-
_min = self.get_quantizer_variable(FQ_MIN)
|
179
|
-
_max = self.get_quantizer_variable(FQ_MAX)
|
180
|
-
q_tensor = uniform_quantizer(inputs, _min, _max, self.num_bits)
|
181
|
-
return q_tensor
|
182
|
-
|
183
|
-
def convert2inferable(self) -> ActivationUniformInferableQuantizer:
|
184
|
-
"""
|
185
|
-
Convert quantizer to inferable quantizer.
|
186
|
-
|
187
|
-
Returns:
|
188
|
-
A pytorch inferable quanizer object.
|
189
|
-
"""
|
190
|
-
_min = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
|
191
|
-
_max = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
|
192
|
-
|
193
|
-
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
194
|
-
min_range=_min.tolist(),
|
195
|
-
max_range=_max.tolist())
|
@@ -14,6 +14,12 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
|
17
|
-
from model_compression_toolkit.trainable_infrastructure.
|
18
|
-
from model_compression_toolkit.
|
19
|
-
|
17
|
+
from model_compression_toolkit.trainable_infrastructure.common.training_method import TrainingMethod
|
18
|
+
from model_compression_toolkit.verify_packages import FOUND_TORCH, FOUND_TF
|
19
|
+
if FOUND_TF:
|
20
|
+
from model_compression_toolkit.trainable_infrastructure.keras.base_keras_quantizer import BaseKerasTrainableQuantizer
|
21
|
+
from model_compression_toolkit.trainable_infrastructure.keras.quantize_wrapper import KerasTrainableQuantizationWrapper
|
22
|
+
|
23
|
+
if FOUND_TORCH:
|
24
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.base_pytorch_quantizer import BasePytorchTrainableQuantizer
|
25
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.activation_quantizers import *
|
@@ -12,12 +12,11 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from abc import abstractmethod
|
15
|
+
from abc import ABC, abstractmethod
|
16
16
|
from enum import Enum
|
17
17
|
from typing import Union, List, Any
|
18
18
|
from inspect import signature
|
19
19
|
|
20
|
-
from model_compression_toolkit.core import common
|
21
20
|
from model_compression_toolkit.logger import Logger
|
22
21
|
|
23
22
|
from mct_quantizers.common.base_inferable_quantizer import BaseInferableQuantizer, \
|
@@ -31,6 +30,7 @@ from mct_quantizers.common.constants import QUANTIZATION_METHOD, \
|
|
31
30
|
VAR = 'var'
|
32
31
|
GROUP = 'group'
|
33
32
|
|
33
|
+
|
34
34
|
class VariableGroup(Enum):
|
35
35
|
"""
|
36
36
|
An enum for choosing trainable variable group
|
@@ -41,7 +41,7 @@ class VariableGroup(Enum):
|
|
41
41
|
QPARAMS = 1
|
42
42
|
|
43
43
|
|
44
|
-
class BaseTrainableQuantizer(BaseInferableQuantizer):
|
44
|
+
class BaseTrainableQuantizer(BaseInferableQuantizer, ABC):
|
45
45
|
def __init__(self,
|
46
46
|
quantization_config: Union[TrainableQuantizerActivationConfig, TrainableQuantizerWeightsConfig]):
|
47
47
|
"""
|
@@ -90,6 +90,7 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
90
90
|
def get_sig(cls):
|
91
91
|
return signature(cls)
|
92
92
|
|
93
|
+
@abstractmethod
|
93
94
|
def initialize_quantization(self,
|
94
95
|
tensor_shape,
|
95
96
|
name: str,
|
@@ -105,8 +106,9 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
105
106
|
Returns: None
|
106
107
|
|
107
108
|
"""
|
108
|
-
raise
|
109
|
+
raise NotImplementedError() # pragma: no cover
|
109
110
|
|
111
|
+
@abstractmethod
|
110
112
|
def __call__(self,
|
111
113
|
input2quantize,
|
112
114
|
training: bool):
|
@@ -120,7 +122,7 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
120
122
|
Returns:
|
121
123
|
The quantized tensor.
|
122
124
|
"""
|
123
|
-
raise
|
125
|
+
raise NotImplementedError() # pragma: no cover
|
124
126
|
|
125
127
|
def activation_quantization(self) -> bool:
|
126
128
|
"""
|
@@ -162,7 +164,7 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
162
164
|
Returns:
|
163
165
|
BaseInferableQuantizer object.
|
164
166
|
"""
|
165
|
-
raise
|
167
|
+
raise NotImplementedError() # pragma: no cover
|
166
168
|
|
167
169
|
def add_quantizer_variable(self, name: str, variable: Any, group: VariableGroup = VariableGroup.WEIGHTS):
|
168
170
|
"""
|
@@ -185,7 +187,6 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
185
187
|
else:
|
186
188
|
Logger.critical(f"Variable '{name}' does not exist in quantizer parameters.") # pragma: no cover
|
187
189
|
|
188
|
-
|
189
190
|
@abstractmethod
|
190
191
|
def get_trainable_variables(self, group: VariableGroup) -> List[Any]:
|
191
192
|
"""
|
@@ -197,4 +198,4 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
197
198
|
Returns:
|
198
199
|
List of trainable variables
|
199
200
|
"""
|
200
|
-
raise
|
201
|
+
raise NotImplementedError() # pragma: no cover
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from enum import Enum
|
16
|
+
|
17
|
+
|
18
|
+
class TrainingMethod(Enum):
|
19
|
+
"""
|
20
|
+
An enum for selecting a training method
|
21
|
+
|
22
|
+
STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers
|
23
|
+
|
24
|
+
DQA - DNN Quantization with Attention. Includes a smooth quantization introduces by DQA method
|
25
|
+
|
26
|
+
LSQ - Learned Step size Quantization. Includes PowerOfTwo, symmetric & uniform quantizers: https://arxiv.org/pdf/1902.08153.pdf
|
27
|
+
|
28
|
+
"""
|
29
|
+
STE = "STE",
|
30
|
+
DQA = "DQA",
|
31
|
+
LSQ = "LSQ"
|
@@ -81,8 +81,8 @@ if FOUND_TF:
|
|
81
81
|
|
82
82
|
|
83
83
|
else:
|
84
|
-
class BaseKerasTrainableQuantizer(BaseTrainableQuantizer):
|
84
|
+
class BaseKerasTrainableQuantizer(BaseTrainableQuantizer): # pragma: no cover
|
85
85
|
def __init__(self, *args, **kwargs):
|
86
86
|
Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
|
87
87
|
"BaseKerasTrainableQuantizer. The 'tensorflow' package is missing "
|
88
|
-
"or is installed with a version higher than 2.15.")
|
88
|
+
"or is installed with a version higher than 2.15.")
|
@@ -101,7 +101,7 @@ if FOUND_TF:
|
|
101
101
|
return inferable_quantizers_wrapper
|
102
102
|
|
103
103
|
else:
|
104
|
-
class KerasTrainableQuantizationWrapper:
|
104
|
+
class KerasTrainableQuantizationWrapper: # pragma: no cover
|
105
105
|
def __init__(self, *args, **kwargs):
|
106
106
|
"""
|
107
107
|
Keras Quantization Wrapper takes a keras layer and quantizers and infer a quantized layer.
|
@@ -112,4 +112,4 @@ else:
|
|
112
112
|
"""
|
113
113
|
Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
|
114
114
|
"KerasTrainableQuantizationWrapper. The 'tensorflow' package is missing "
|
115
|
-
"or is installed with a version higher than 2.15.")
|
115
|
+
"or is installed with a version higher than 2.15.")
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from .base_activation_quantizer import BasePytorchActivationTrainableQuantizer
|
16
|
+
from .ste.symmetric_ste import STESymmetricActivationTrainableQuantizer
|
17
|
+
from .ste.uniform_ste import STEUniformActivationTrainableQuantizer
|
18
|
+
from .lsq.symmetric_lsq import LSQSymmetricActivationTrainableQuantizer
|
19
|
+
from .lsq.uniform_lsq import LSQUniformActivationTrainableQuantizer
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from abc import ABC
|
16
|
+
|
17
|
+
from model_compression_toolkit.trainable_infrastructure import BasePytorchTrainableQuantizer
|
18
|
+
|
19
|
+
|
20
|
+
class BasePytorchActivationTrainableQuantizer(BasePytorchTrainableQuantizer, ABC):
|
21
|
+
""" Base class for PyTorch trainable activation quantizers """
|
22
|
+
pass
|
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from typing import Union
|
16
|
+
|
17
|
+
import numpy as np
|
18
|
+
import torch
|
19
|
+
from torch import nn
|
20
|
+
|
21
|
+
from mct_quantizers import mark_quantizer, QuantizationTarget, QuantizationMethod, PytorchQuantizationWrapper
|
22
|
+
from mct_quantizers.pytorch.quantizers import ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer
|
23
|
+
from model_compression_toolkit import constants as C
|
24
|
+
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
25
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerActivationConfig, TrainingMethod
|
26
|
+
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
27
|
+
from model_compression_toolkit.trainable_infrastructure.common.constants import THRESHOLD_TENSOR
|
28
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.activation_quantizers import \
|
29
|
+
BasePytorchActivationTrainableQuantizer
|
30
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.quantizer_utils import symmetric_lsq_quantizer
|
31
|
+
|
32
|
+
|
33
|
+
# moved (and renamed) from model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py
|
34
|
+
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
35
|
+
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
36
|
+
identifier=TrainingMethod.LSQ)
|
37
|
+
class LSQSymmetricActivationTrainableQuantizer(BasePytorchActivationTrainableQuantizer):
|
38
|
+
"""
|
39
|
+
Trainable constrained quantizer to quantize layer activations.
|
40
|
+
"""
|
41
|
+
|
42
|
+
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
43
|
+
"""
|
44
|
+
Initialize a LSQSymmetricActivationTrainableQuantizer object with parameters to use
|
45
|
+
for symmetric or power of two quantization.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
quantization_config: trainable quantizer config class
|
49
|
+
"""
|
50
|
+
super().__init__(quantization_config)
|
51
|
+
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
52
|
+
self.sign = quantization_config.activation_quantization_params['is_signed']
|
53
|
+
self.threshold_values = np.array([quantization_config.activation_quantization_params[C.THRESHOLD]])
|
54
|
+
self.num_bits = quantization_config.activation_n_bits
|
55
|
+
n_pos_bits = self.num_bits - int(self.sign)
|
56
|
+
self.min_int = -int(self.sign) * (2 ** n_pos_bits)
|
57
|
+
self.max_int = (2 ** n_pos_bits) - 1
|
58
|
+
|
59
|
+
def initialize_quantization(self,
|
60
|
+
tensor_shape: torch.Size,
|
61
|
+
name: str,
|
62
|
+
layer: PytorchQuantizationWrapper):
|
63
|
+
"""
|
64
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
65
|
+
|
66
|
+
Args:
|
67
|
+
tensor_shape: tensor shape of the quantized tensor.
|
68
|
+
name: Tensor name.
|
69
|
+
layer: Layer to quantize.
|
70
|
+
"""
|
71
|
+
layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
|
72
|
+
|
73
|
+
# save the quantizer added parameters for later calculations
|
74
|
+
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
|
75
|
+
|
76
|
+
def __call__(self,
|
77
|
+
inputs: torch.Tensor,
|
78
|
+
training: bool = True) -> torch.Tensor:
|
79
|
+
"""
|
80
|
+
Quantize a tensor.
|
81
|
+
Args:
|
82
|
+
inputs: Input tensor to quantize.
|
83
|
+
training: Whether the graph is in training mode.
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
The quantized tensor.
|
87
|
+
"""
|
88
|
+
|
89
|
+
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
90
|
+
n_channels = inputs.shape[1]
|
91
|
+
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
92
|
+
inputs_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor)
|
93
|
+
return inputs_quantized
|
94
|
+
|
95
|
+
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
96
|
+
"""
|
97
|
+
Convert quantizer to inferable quantizer.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
A pytorch inferable quanizer object.
|
101
|
+
"""
|
102
|
+
threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
|
103
|
+
if self.power_of_two:
|
104
|
+
pot_threshold = np.power(2.0, np.ceil(np.log2(threshold_values)))
|
105
|
+
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
106
|
+
threshold=pot_threshold.tolist(),
|
107
|
+
signed=self.sign)
|
108
|
+
else:
|
109
|
+
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
110
|
+
threshold=threshold_values.tolist(),
|
111
|
+
signed=self.sign)
|