mct-nightly 1.8.0.27022023.post430__py3-none-any.whl → 1.8.0.27032023.post403__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.8.0.27022023.post430.dist-info → mct_nightly-1.8.0.27032023.post403.dist-info}/METADATA +7 -7
- {mct_nightly-1.8.0.27022023.post430.dist-info → mct_nightly-1.8.0.27032023.post403.dist-info}/RECORD +65 -59
- {mct_nightly-1.8.0.27022023.post430.dist-info → mct_nightly-1.8.0.27032023.post403.dist-info}/WHEEL +1 -1
- model_compression_toolkit/__init__.py +9 -15
- model_compression_toolkit/core/common/logger.py +10 -2
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +6 -1
- model_compression_toolkit/core/keras/quantization_facade.py +1 -1
- model_compression_toolkit/core/pytorch/constants.py +4 -0
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +4 -10
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +16 -2
- model_compression_toolkit/exporter/__init__.py +5 -0
- model_compression_toolkit/exporter/model_exporter/__init__.py +0 -3
- model_compression_toolkit/exporter/model_exporter/tflite/fakely_quant_tflite_exporter.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/__init__.py +4 -8
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +45 -39
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +39 -24
- model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +50 -42
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +43 -36
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +24 -5
- model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +25 -18
- model_compression_toolkit/gptq/__init__.py +6 -0
- model_compression_toolkit/gptq/common/gptq_config.py +60 -106
- model_compression_toolkit/gptq/common/gptq_constants.py +0 -7
- model_compression_toolkit/gptq/common/gptq_training.py +28 -38
- model_compression_toolkit/gptq/keras/gptq_training.py +10 -28
- model_compression_toolkit/gptq/keras/graph_info.py +8 -33
- model_compression_toolkit/gptq/keras/quantization_facade.py +6 -12
- model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +0 -1
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +2 -2
- model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py +45 -0
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py +112 -0
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +22 -128
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +11 -41
- model_compression_toolkit/gptq/pytorch/gptq_training.py +12 -4
- model_compression_toolkit/gptq/pytorch/graph_info.py +9 -6
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +9 -22
- model_compression_toolkit/gptq/pytorch/quantizer/__init__.py +3 -1
- model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +0 -20
- model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py +10 -1
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +2 -2
- model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py +45 -0
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py +14 -0
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py +115 -0
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +236 -0
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +196 -0
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +9 -31
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +30 -37
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +27 -36
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +21 -21
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +25 -26
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/common/get_all_subclasses.py +1 -2
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/common/get_quantizers.py +1 -1
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/keras/quantize_wrapper.py +12 -0
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/keras/quantizers/__init__.py +4 -0
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/keras/quantizers/constants.py +1 -0
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/pytorch/quantize_wrapper.py +12 -0
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/pytorch/quantizers/__init__.py +6 -0
- model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/pytorch/quantizers/constants.py +3 -0
- model_compression_toolkit/quantizers_infrastructure/trainable_infrastructure/common/base_trainable_quantizer.py +53 -2
- model_compression_toolkit/quantizers_infrastructure/trainable_infrastructure/common/get_quantizers.py +2 -1
- model_compression_toolkit/quantizers_infrastructure/trainable_infrastructure/keras/base_keras_quantizer.py +22 -4
- model_compression_toolkit/quantizers_infrastructure/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +24 -3
- model_compression_toolkit/gptq/common/gptq_quantizer_config.py +0 -93
- {mct_nightly-1.8.0.27022023.post430.dist-info → mct_nightly-1.8.0.27032023.post403.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.8.0.27022023.post430.dist-info → mct_nightly-1.8.0.27032023.post403.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/quantizers_infrastructure/inferable_infrastructure/{common → pytorch/quantizers/activation_inferable_quantizers}/activation_lut_pot_inferable_quantizer.py +0 -0
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
from typing import
|
|
15
|
+
from typing import Union
|
|
16
16
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import torch
|
|
@@ -31,6 +31,7 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
31
31
|
ActivationSymmetricInferableQuantizer
|
|
32
32
|
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.trainable_quantizer_config import \
|
|
33
33
|
TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
|
|
34
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
@mark_quantizer(quantization_target=qi.QuantizationTarget.Weights,
|
|
@@ -66,22 +67,19 @@ class STEWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
66
67
|
self.max_int = (2 ** n_pos_bits) - 1
|
|
67
68
|
self.min = delta * self.min_int
|
|
68
69
|
self.max = delta * self.max_int
|
|
69
|
-
|
|
70
|
+
|
|
70
71
|
|
|
71
72
|
def initialize_quantization(self,
|
|
72
73
|
tensor_shape: torch.Size,
|
|
73
74
|
name: str,
|
|
74
|
-
layer: qi.PytorchQuantizationWrapper)
|
|
75
|
+
layer: qi.PytorchQuantizationWrapper):
|
|
75
76
|
"""
|
|
76
|
-
Add
|
|
77
|
-
Args:
|
|
78
|
-
tensor_shape: Tensor shape the quantizer quantize.
|
|
79
|
-
name: Prefix of variables names.
|
|
80
|
-
layer: Layer to add the variables to. The variables are saved
|
|
81
|
-
in the layer's scope.
|
|
77
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
82
78
|
|
|
83
|
-
|
|
84
|
-
|
|
79
|
+
Args:
|
|
80
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
81
|
+
name: Tensor name.
|
|
82
|
+
layer: Layer to quantize.
|
|
85
83
|
"""
|
|
86
84
|
|
|
87
85
|
# Add threshold variables to layer.
|
|
@@ -89,9 +87,8 @@ class STEWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
89
87
|
requires_grad=False))
|
|
90
88
|
|
|
91
89
|
# save the quantizer added parameters for later calculations
|
|
92
|
-
self.
|
|
90
|
+
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name + "_" + THRESHOLD_TENSOR), VariableGroup.QPARAMS)
|
|
93
91
|
|
|
94
|
-
return self.quantizer_parameters
|
|
95
92
|
|
|
96
93
|
def __call__(self,
|
|
97
94
|
inputs: nn.Parameter,
|
|
@@ -116,7 +113,7 @@ class STEWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
116
113
|
Returns:
|
|
117
114
|
A pytorch inferable quanizer object.
|
|
118
115
|
"""
|
|
119
|
-
np_threshold = self.
|
|
116
|
+
np_threshold = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy().flatten()
|
|
120
117
|
if self.power_of_two:
|
|
121
118
|
pot_threshold = 2 ** np.ceil(np.log2(np_threshold))
|
|
122
119
|
return WeightsPOTInferableQuantizer(num_bits=self.num_bits,
|
|
@@ -153,20 +150,23 @@ class STEActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
153
150
|
np_threshold_values = quantization_config.activation_quantization_params[C.THRESHOLD]
|
|
154
151
|
self.threshold_tensor = torch.Tensor([np_threshold_values])
|
|
155
152
|
self.num_bits = quantization_config.activation_n_bits
|
|
156
|
-
self.quantizer_parameters = {}
|
|
157
153
|
|
|
158
154
|
def initialize_quantization(self,
|
|
159
155
|
tensor_shape: torch.Size,
|
|
160
156
|
name: str,
|
|
161
|
-
layer: qi.PytorchQuantizationWrapper)
|
|
157
|
+
layer: qi.PytorchQuantizationWrapper):
|
|
162
158
|
"""
|
|
163
|
-
Add
|
|
159
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
163
|
+
name: Tensor name.
|
|
164
|
+
layer: Layer to quantize.
|
|
164
165
|
"""
|
|
165
166
|
layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_tensor), requires_grad=True))
|
|
166
167
|
|
|
167
168
|
# save the quantizer added parameters for later calculations
|
|
168
|
-
self.
|
|
169
|
-
return self.quantizer_parameters
|
|
169
|
+
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
|
|
170
170
|
|
|
171
171
|
def __call__(self,
|
|
172
172
|
inputs: torch.Tensor,
|
|
@@ -181,7 +181,7 @@ class STEActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
181
181
|
The quantized tensor.
|
|
182
182
|
"""
|
|
183
183
|
|
|
184
|
-
_t = self.
|
|
184
|
+
_t = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
|
185
185
|
q_tensor = symmetric_quantizer(inputs, _t, self.num_bits, sign=self.sign)
|
|
186
186
|
return q_tensor
|
|
187
187
|
|
|
@@ -192,7 +192,7 @@ class STEActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
192
192
|
Returns:
|
|
193
193
|
A pytorch inferable quanizer object.
|
|
194
194
|
"""
|
|
195
|
-
np_threshold = self.
|
|
195
|
+
np_threshold = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
|
|
196
196
|
if self.power_of_two:
|
|
197
197
|
pot_threshold = np.power(2.0, np.ceil(np.log2(np_threshold)))
|
|
198
198
|
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
|
@@ -12,8 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
from typing import Dict
|
|
16
|
-
|
|
17
15
|
import numpy as np
|
|
18
16
|
import torch
|
|
19
17
|
import torch.nn as nn
|
|
@@ -32,6 +30,7 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
32
30
|
WeightsUniformInferableQuantizer, ActivationUniformInferableQuantizer
|
|
33
31
|
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.trainable_quantizer_config import \
|
|
34
32
|
TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
|
|
33
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
35
34
|
|
|
36
35
|
|
|
37
36
|
@mark_quantizer(quantization_target=qi.QuantizationTarget.Weights,
|
|
@@ -64,22 +63,18 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
64
63
|
[-1]) if self.quantization_config.weights_per_channel_threshold else float(
|
|
65
64
|
self.min_values)
|
|
66
65
|
|
|
67
|
-
self.quantizer_parameters = {}
|
|
68
66
|
|
|
69
67
|
def initialize_quantization(self,
|
|
70
68
|
tensor_shape: torch.Size,
|
|
71
69
|
name: str,
|
|
72
|
-
layer: qi.PytorchQuantizationWrapper)
|
|
70
|
+
layer: qi.PytorchQuantizationWrapper):
|
|
73
71
|
"""
|
|
74
|
-
Add
|
|
75
|
-
Args:
|
|
76
|
-
tensor_shape: Tensor shape the quantizer quantize.
|
|
77
|
-
name: Prefix of variables names.
|
|
78
|
-
layer: Layer to add the variables to. The variables are saved
|
|
79
|
-
in the layer's scope.
|
|
72
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
80
73
|
|
|
81
|
-
|
|
82
|
-
|
|
74
|
+
Args:
|
|
75
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
76
|
+
name: Tensor name.
|
|
77
|
+
layer: Layer to quantize.
|
|
83
78
|
"""
|
|
84
79
|
|
|
85
80
|
# Add min and max variables to layer.
|
|
@@ -87,9 +82,9 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
87
82
|
layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_values), requires_grad=False))
|
|
88
83
|
|
|
89
84
|
# Save the quantizer parameters for later calculations
|
|
90
|
-
self.
|
|
85
|
+
self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
|
|
86
|
+
self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
|
|
91
87
|
|
|
92
|
-
return self.quantizer_parameters
|
|
93
88
|
|
|
94
89
|
def __call__(self,
|
|
95
90
|
inputs: nn.Parameter,
|
|
@@ -102,7 +97,7 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
102
97
|
Returns:
|
|
103
98
|
quantized tensor
|
|
104
99
|
"""
|
|
105
|
-
return uniform_quantizer(inputs, self.
|
|
100
|
+
return uniform_quantizer(inputs, self.get_quantizer_variable(FQ_MIN), self.get_quantizer_variable(FQ_MAX), self.num_bits)
|
|
106
101
|
|
|
107
102
|
def convert2inferable(self) -> WeightsUniformInferableQuantizer:
|
|
108
103
|
"""
|
|
@@ -111,8 +106,8 @@ class STEUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
111
106
|
Returns:
|
|
112
107
|
A pytorch inferable quanizer object.
|
|
113
108
|
"""
|
|
114
|
-
_min = self.
|
|
115
|
-
_max = self.
|
|
109
|
+
_min = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
|
|
110
|
+
_max = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
|
|
116
111
|
|
|
117
112
|
return WeightsUniformInferableQuantizer(num_bits=self.num_bits,
|
|
118
113
|
min_range=_min, max_range=_max,
|
|
@@ -143,21 +138,25 @@ class STEUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
143
138
|
self.min_range_tensor = torch.Tensor([np_min_range])
|
|
144
139
|
self.max_range_tensor = torch.Tensor([np_max_range])
|
|
145
140
|
self.num_bits = quantization_config.activation_n_bits
|
|
146
|
-
self.quantizer_parameters = {}
|
|
147
141
|
|
|
148
142
|
def initialize_quantization(self,
|
|
149
143
|
tensor_shape: torch.Size,
|
|
150
144
|
name: str,
|
|
151
|
-
layer: qi.PytorchQuantizationWrapper)
|
|
145
|
+
layer: qi.PytorchQuantizationWrapper):
|
|
152
146
|
"""
|
|
153
|
-
Add
|
|
147
|
+
Add quantizer parameters to the quantizer parameters dictionary
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
tensor_shape: tensor shape of the quantized tensor.
|
|
151
|
+
name: Tensor name.
|
|
152
|
+
layer: Layer to quantize.
|
|
154
153
|
"""
|
|
155
154
|
layer.register_parameter(name+"_"+FQ_MIN, nn.Parameter(to_torch_tensor(self.min_range_tensor), requires_grad=True))
|
|
156
155
|
layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_range_tensor), requires_grad=True))
|
|
157
156
|
|
|
158
157
|
# Save the quantizer parameters for later calculations
|
|
159
|
-
self.
|
|
160
|
-
|
|
158
|
+
self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
|
|
159
|
+
self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
|
|
161
160
|
|
|
162
161
|
def __call__(self,
|
|
163
162
|
inputs: torch.Tensor,
|
|
@@ -172,8 +171,8 @@ class STEUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
172
171
|
The quantized tensor.
|
|
173
172
|
"""
|
|
174
173
|
|
|
175
|
-
_min = self.
|
|
176
|
-
_max = self.
|
|
174
|
+
_min = self.get_quantizer_variable(FQ_MIN)
|
|
175
|
+
_max = self.get_quantizer_variable(FQ_MAX)
|
|
177
176
|
q_tensor = uniform_quantizer(inputs, _min, _max, self.num_bits)
|
|
178
177
|
return q_tensor
|
|
179
178
|
|
|
@@ -184,8 +183,8 @@ class STEUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
|
184
183
|
Returns:
|
|
185
184
|
A pytorch inferable quanizer object.
|
|
186
185
|
"""
|
|
187
|
-
_min = self.
|
|
188
|
-
_max = self.
|
|
186
|
+
_min = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
|
|
187
|
+
_max = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
|
|
189
188
|
|
|
190
189
|
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
|
191
190
|
min_range=_min, max_range=_max)
|
|
@@ -28,5 +28,4 @@ def get_all_subclasses(cls: type) -> Set[type]:
|
|
|
28
28
|
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
|
-
return set(cls.__subclasses__()).union(
|
|
32
|
-
[s for c in cls.__subclasses__() for s in get_all_subclasses(c)])
|
|
31
|
+
return set(cls.__subclasses__()).union([s for c in cls.__subclasses__() for s in get_all_subclasses(c)])
|
|
@@ -41,7 +41,7 @@ def get_inferable_quantizer_class(quant_target: QuantizationTarget,
|
|
|
41
41
|
qat_quantizer_classes = get_all_subclasses(quantizer_base_class)
|
|
42
42
|
filtered_quantizers = list(filter(lambda q_class: getattr(q_class, QUANTIZATION_TARGET) == quant_target and
|
|
43
43
|
getattr(q_class, QUANTIZATION_METHOD) is not None and
|
|
44
|
-
|
|
44
|
+
quant_method in getattr(q_class, QUANTIZATION_METHOD),
|
|
45
45
|
qat_quantizer_classes))
|
|
46
46
|
|
|
47
47
|
if len(filtered_quantizers) != 1:
|
|
@@ -314,6 +314,18 @@ if FOUND_TF:
|
|
|
314
314
|
|
|
315
315
|
return self._weights_vars
|
|
316
316
|
|
|
317
|
+
def get_quantized_weights(self) -> Dict[str, tf.Tensor]:
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
Returns: A dictionary of weights attributes to quantized weights.
|
|
321
|
+
|
|
322
|
+
"""
|
|
323
|
+
quantized_weights = {}
|
|
324
|
+
weights_var = self.get_weights_vars()
|
|
325
|
+
for name, w, quantizer in weights_var:
|
|
326
|
+
quantized_weights[name] = quantizer(w)
|
|
327
|
+
return quantized_weights
|
|
328
|
+
|
|
317
329
|
else:
|
|
318
330
|
class KerasQuantizationWrapper(object):
|
|
319
331
|
def __init__(self,
|
|
@@ -18,6 +18,10 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
18
18
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.weights_inferable_quantizers.weights_pot_inferable_quantizer import WeightsPOTInferableQuantizer
|
|
19
19
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.weights_inferable_quantizers.weights_symmetric_inferable_quantizer import WeightsSymmetricInferableQuantizer
|
|
20
20
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.weights_inferable_quantizers.weights_uniform_inferable_quantizer import WeightsUniformInferableQuantizer
|
|
21
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.weights_inferable_quantizers.weights_lut_symmetric_inferable_quantizer import WeightsLUTSymmetricInferableQuantizer
|
|
22
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.weights_inferable_quantizers.weights_lut_pot_inferable_quantizer import WeightsLUTPOTInferableQuantizer
|
|
23
|
+
|
|
21
24
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.activation_inferable_quantizers.activation_pot_inferable_quantizer import ActivationPOTInferableQuantizer
|
|
22
25
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.activation_inferable_quantizers.activation_symmetric_inferable_quantizer import ActivationSymmetricInferableQuantizer
|
|
23
26
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.activation_inferable_quantizers.activation_uniform_inferable_quantizer import ActivationUniformInferableQuantizer
|
|
27
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.activation_inferable_quantizers.activation_lut_pot_inferable_quantizer import ActivationLutPOTInferableQuantizer
|
|
@@ -240,6 +240,18 @@ if FOUND_TORCH:
|
|
|
240
240
|
|
|
241
241
|
return outputs
|
|
242
242
|
|
|
243
|
+
def get_quantized_weights(self) -> Dict[str, torch.Tensor]:
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
Returns: A dictionary of weights attributes to quantized weights.
|
|
247
|
+
|
|
248
|
+
"""
|
|
249
|
+
quantized_weights = {}
|
|
250
|
+
weights_var = self.get_weights_vars()
|
|
251
|
+
for name, w, quantizer in weights_var:
|
|
252
|
+
quantized_weights[name] = quantizer(w)
|
|
253
|
+
return quantized_weights
|
|
254
|
+
|
|
243
255
|
else:
|
|
244
256
|
class PytorchQuantizationWrapper(object):
|
|
245
257
|
def __init__(self,
|
|
@@ -19,6 +19,8 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
19
19
|
import ActivationSymmetricInferableQuantizer
|
|
20
20
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.activation_inferable_quantizers.activation_uniform_inferable_quantizer \
|
|
21
21
|
import ActivationUniformInferableQuantizer
|
|
22
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.activation_inferable_quantizers.activation_lut_pot_inferable_quantizer \
|
|
23
|
+
import ActivationLutPOTInferableQuantizer
|
|
22
24
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.base_pytorch_inferable_quantizer \
|
|
23
25
|
import BasePyTorchInferableQuantizer
|
|
24
26
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.weights_inferable_quantizers.weights_pot_inferable_quantizer \
|
|
@@ -27,3 +29,7 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
27
29
|
import WeightsSymmetricInferableQuantizer
|
|
28
30
|
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.weights_inferable_quantizers.weights_uniform_inferable_quantizer \
|
|
29
31
|
import WeightsUniformInferableQuantizer
|
|
32
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.weights_inferable_quantizers.weights_lut_symmetric_inferable_quantizer \
|
|
33
|
+
import WeightsLUTSymmetricInferableQuantizer
|
|
34
|
+
from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.pytorch.quantizers.weights_inferable_quantizers.weights_lut_pot_inferable_quantizer \
|
|
35
|
+
import WeightsLUTPOTInferableQuantizer
|
|
@@ -12,8 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
|
|
16
|
-
from
|
|
15
|
+
from abc import abstractmethod
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Union, List, Any
|
|
17
18
|
from inspect import signature
|
|
18
19
|
|
|
19
20
|
from model_compression_toolkit.core import common
|
|
@@ -27,6 +28,19 @@ from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructur
|
|
|
27
28
|
QUANTIZATION_TARGET
|
|
28
29
|
|
|
29
30
|
|
|
31
|
+
VAR = 'var'
|
|
32
|
+
GROUP = 'group'
|
|
33
|
+
|
|
34
|
+
class VariableGroup(Enum):
|
|
35
|
+
"""
|
|
36
|
+
An enum for choosing trainable variable group
|
|
37
|
+
0. WEIGHTS
|
|
38
|
+
1. QPARAMS
|
|
39
|
+
"""
|
|
40
|
+
WEIGHTS = 0
|
|
41
|
+
QPARAMS = 1
|
|
42
|
+
|
|
43
|
+
|
|
30
44
|
class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
31
45
|
def __init__(self,
|
|
32
46
|
quantization_config: Union[TrainableQuantizerActivationConfig, TrainableQuantizerWeightsConfig]):
|
|
@@ -70,6 +84,8 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
|
70
84
|
common.Logger.error(
|
|
71
85
|
f'Unknown Quantization Part:{static_quantization_target}') # pragma: no cover
|
|
72
86
|
|
|
87
|
+
self.quantizer_parameters = {}
|
|
88
|
+
|
|
73
89
|
@classmethod
|
|
74
90
|
def get_sig(cls):
|
|
75
91
|
return signature(cls)
|
|
@@ -147,3 +163,38 @@ class BaseTrainableQuantizer(BaseInferableQuantizer):
|
|
|
147
163
|
BaseInferableQuantizer object.
|
|
148
164
|
"""
|
|
149
165
|
raise NotImplemented # pragma: no cover
|
|
166
|
+
|
|
167
|
+
def add_quantizer_variable(self, name: str, variable: Any, group: VariableGroup = VariableGroup.WEIGHTS):
|
|
168
|
+
"""
|
|
169
|
+
Add a quantizer variable to quantizer_parameters dictionary
|
|
170
|
+
"""
|
|
171
|
+
self.quantizer_parameters.update({name: {VAR: variable, GROUP: group}})
|
|
172
|
+
|
|
173
|
+
def get_quantizer_variable(self, name: str) -> Any:
|
|
174
|
+
"""
|
|
175
|
+
Get a quantizer variable by name
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
name: variable name
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
trainable variable
|
|
182
|
+
"""
|
|
183
|
+
if name in self.quantizer_parameters:
|
|
184
|
+
return self.quantizer_parameters[name][VAR]
|
|
185
|
+
else:
|
|
186
|
+
common.Logger.error(f'Variable {name} is not exist in quantizers parameters!') # pragma: no cover
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
@abstractmethod
|
|
190
|
+
def get_trainable_variables(self, group: VariableGroup) -> List[Any]:
|
|
191
|
+
"""
|
|
192
|
+
Get trainable parameters with specific group from quantizer
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
group: Enum of variable group
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
List of trainable variables
|
|
199
|
+
"""
|
|
200
|
+
raise NotImplemented # pragma: no cover
|
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
from typing import Union
|
|
16
16
|
|
|
17
|
-
from model_compression_toolkit import
|
|
17
|
+
from model_compression_toolkit.gptq import RoundingType
|
|
18
|
+
from model_compression_toolkit import TrainingMethod
|
|
18
19
|
from model_compression_toolkit.core.common import Logger
|
|
19
20
|
from model_compression_toolkit.core.common.target_platform import QuantizationMethod
|
|
20
21
|
from model_compression_toolkit.quantizers_infrastructure import QuantizationTarget
|
|
@@ -12,12 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
from typing import Dict, Any, Union
|
|
15
|
+
from typing import Dict, Any, Union, List
|
|
16
16
|
|
|
17
17
|
from model_compression_toolkit.core.common import Logger
|
|
18
18
|
from model_compression_toolkit.core.common.constants import FOUND_TF
|
|
19
|
-
|
|
20
|
-
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer
|
|
19
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
20
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer, VAR, GROUP
|
|
21
21
|
from model_compression_toolkit.quantizers_infrastructure import TrainableQuantizerWeightsConfig, \
|
|
22
22
|
TrainableQuantizerActivationConfig
|
|
23
23
|
|
|
@@ -25,7 +25,7 @@ if FOUND_TF:
|
|
|
25
25
|
QUANTIZATION_CONFIG = 'quantization_config'
|
|
26
26
|
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.keras.config_serialization import config_serialization, \
|
|
27
27
|
config_deserialization
|
|
28
|
-
|
|
28
|
+
import tensorflow as tf
|
|
29
29
|
|
|
30
30
|
class BaseKerasTrainableQuantizer(BaseTrainableQuantizer):
|
|
31
31
|
def __init__(self,
|
|
@@ -61,6 +61,24 @@ if FOUND_TF:
|
|
|
61
61
|
# Note that a quantizer only receive quantization config and the rest of define hardcoded inside the speficie quantizer.
|
|
62
62
|
return cls(quantization_config=quantization_config)
|
|
63
63
|
|
|
64
|
+
def get_trainable_variables(self, group: VariableGroup) -> List[tf.Tensor]:
|
|
65
|
+
"""
|
|
66
|
+
Get trainable parameters with specific group from quantizer
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
group: Enum of variable group
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
List of trainable variables
|
|
73
|
+
"""
|
|
74
|
+
quantizer_trainable = []
|
|
75
|
+
for name, parameter_dict in self.quantizer_parameters.items():
|
|
76
|
+
quantizer_parameter, parameter_group = parameter_dict[VAR], parameter_dict[GROUP]
|
|
77
|
+
if quantizer_parameter.trainable and parameter_group == group:
|
|
78
|
+
quantizer_trainable.append(quantizer_parameter)
|
|
79
|
+
return quantizer_trainable
|
|
80
|
+
|
|
81
|
+
|
|
64
82
|
else:
|
|
65
83
|
class BaseKerasTrainableQuantizer(BaseTrainableQuantizer):
|
|
66
84
|
def __init__(self,
|
|
@@ -12,17 +12,20 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
from typing import Union
|
|
15
|
+
from typing import Union, List
|
|
16
16
|
|
|
17
17
|
from model_compression_toolkit.core.common.logger import Logger
|
|
18
18
|
from model_compression_toolkit.core.common.constants import FOUND_TORCH
|
|
19
|
-
|
|
20
|
-
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer
|
|
19
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
|
20
|
+
from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer, VAR, GROUP
|
|
21
21
|
from model_compression_toolkit.quantizers_infrastructure import TrainableQuantizerWeightsConfig, \
|
|
22
22
|
TrainableQuantizerActivationConfig
|
|
23
23
|
|
|
24
|
+
|
|
24
25
|
if FOUND_TORCH:
|
|
25
26
|
|
|
27
|
+
import torch
|
|
28
|
+
|
|
26
29
|
class BasePytorchTrainableQuantizer(BaseTrainableQuantizer):
|
|
27
30
|
def __init__(self,
|
|
28
31
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
|
@@ -35,6 +38,24 @@ if FOUND_TORCH:
|
|
|
35
38
|
"""
|
|
36
39
|
super().__init__(quantization_config)
|
|
37
40
|
|
|
41
|
+
|
|
42
|
+
def get_trainable_variables(self, group: VariableGroup) -> List[torch.Tensor]:
|
|
43
|
+
"""
|
|
44
|
+
Get trainable parameters with specific group from quantizer
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
group: Enum of variable group
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of trainable variables
|
|
51
|
+
"""
|
|
52
|
+
quantizer_trainable = []
|
|
53
|
+
for name, parameter_dict in self.quantizer_parameters.items():
|
|
54
|
+
quantizer_parameter, parameter_group = parameter_dict[VAR], parameter_dict[GROUP]
|
|
55
|
+
if quantizer_parameter.requires_grad and parameter_group == group:
|
|
56
|
+
quantizer_trainable.append(quantizer_parameter)
|
|
57
|
+
return quantizer_trainable
|
|
58
|
+
|
|
38
59
|
else:
|
|
39
60
|
class BasePytorchTrainableQuantizer(BaseTrainableQuantizer):
|
|
40
61
|
def __init__(self,
|
|
@@ -1,93 +0,0 @@
|
|
|
1
|
-
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
from typing import Any, List, Callable
|
|
16
|
-
|
|
17
|
-
from model_compression_toolkit.core.common import Logger
|
|
18
|
-
from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, REGULARIZATION_VALUES
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class GPTQQuantizerConfig:
|
|
22
|
-
"""
|
|
23
|
-
A base class to define specific quantizer configuration for GPTQ quantizer.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self):
|
|
27
|
-
self.n_batches = None
|
|
28
|
-
|
|
29
|
-
def get_regularization_value(self, fxp_model: Any, **kwargs) -> float:
|
|
30
|
-
"""
|
|
31
|
-
Computes a regularization value for the quantizer's loss (if needed).
|
|
32
|
-
In the base class it only returns 0, to be used for GPTQ quantizers that don't require regularization.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
fxp_model: The quantized model that is being trained.
|
|
36
|
-
**kwargs: Additional arguments for the quantizer regularization computation.
|
|
37
|
-
|
|
38
|
-
Returns: The regularization value.
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
return 0
|
|
42
|
-
|
|
43
|
-
def set_num_batches(self, num_batches: int):
|
|
44
|
-
"""
|
|
45
|
-
Allows to set the number of batches that the quantizer uses for training (in each epoch).
|
|
46
|
-
|
|
47
|
-
Args:
|
|
48
|
-
num_batches: number of batches to be set.
|
|
49
|
-
|
|
50
|
-
"""
|
|
51
|
-
self.n_batches = num_batches
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class SoftQuantizerConfig(GPTQQuantizerConfig):
|
|
55
|
-
def __init__(self, entropy_regularization: float = REG_DEFAULT):
|
|
56
|
-
"""
|
|
57
|
-
Initializes an object that holds the arguments that are needed for soft rounding quantizer.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
entropy_regularization (float): A floating point number that defines the gumbel entropy regularization factor.
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
super().__init__()
|
|
64
|
-
self.entropy_regularization = entropy_regularization
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def get_regularization_value(self, fxp_model: Any, **kwargs) -> float:
|
|
68
|
-
"""
|
|
69
|
-
Computes a regularization value for the soft quantizer.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
fxp_model: The quantized model that is being trained.
|
|
73
|
-
**kwargs: Additional arguments for the quantizer regularization computation.
|
|
74
|
-
|
|
75
|
-
Returns: The regularization value.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
soft_rounding_reg_values = kwargs.get(REGULARIZATION_VALUES)
|
|
79
|
-
|
|
80
|
-
if soft_rounding_reg_values is None:
|
|
81
|
-
Logger.error("No regularization values has been provided for computing the regularization " # pragma: no cover
|
|
82
|
-
"of the soft quantizer.")
|
|
83
|
-
if not isinstance(soft_rounding_reg_values, List):
|
|
84
|
-
Logger.error("The provided regularization values parameter of the soft quantizer " # pragma: no cover
|
|
85
|
-
"is not compatible (should be a list).")
|
|
86
|
-
|
|
87
|
-
reg = 0
|
|
88
|
-
|
|
89
|
-
for sq in soft_rounding_reg_values:
|
|
90
|
-
reg += sq
|
|
91
|
-
|
|
92
|
-
return self.entropy_regularization * reg
|
|
93
|
-
|
|
File without changes
|
|
File without changes
|