PyPI - mct-nightly - Versions diffs - 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl - Mend

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -12,16 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, List, Tuple
+from typing import Callable, List, Tuple, Union
 import tensorflow as tf
-from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import QuantizeWrapper
+from keras import Model
+from tensorflow.keras.layers import Layer
 from tqdm import tqdm
 # As from Tensorflow 2.6, keras is a separate package and some classes should be imported differently.
-from model_compression_toolkit.gptq.keras.gptq_model_builder import GPTQKerasModelBuilder
+from model_compression_toolkit.core.common.user_info import UserInformation
+from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
 from packaging import version
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
+from model_compression_toolkit.gptq.keras.quantizer.quantization_builder import quantization_builder
+from model_compression_toolkit.quantizers_infrastructure import KerasQuantizationWrapper
 if version.parse(tf.__version__) < version.parse("2.6"):
     from tensorflow.python.keras.engine.base_layer import TensorFlowOpLayer
 else:
@@ -31,15 +37,14 @@ from model_compression_toolkit.core import common
 from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
 from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.gptq.keras.graph_info import get_trainable_parameters, get_weights_for_loss, \
-    get_gumbel_probability
+from model_compression_toolkit.gptq.keras.graph_info import get_weights_for_loss, get_gptq_trainable_parameters
+from model_compression_toolkit.gptq.keras.quantizer.regularization_factory import get_regularization
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 import numpy as np
 import copy
 from model_compression_toolkit.core.keras.constants import BIAS, USE_BIAS
-from model_compression_toolkit.gptq.keras.quantizer import WeightQuantizeConfig
-from model_compression_toolkit.gptq.keras.optimizers.sam_optimizer import SAM
+from model_compression_toolkit import quantizers_infrastructure as qi
 class KerasGPTQTrainer(GPTQTrainer):
@@ -77,11 +82,10 @@ class KerasGPTQTrainer(GPTQTrainer):
         self.loss_list = []
         self.input_scale = 1
-        trainable_weights, bias_weights, trainable_threshold, temperature_weights = get_trainable_parameters(
+        trainable_weights, bias_weights, trainable_threshold = get_gptq_trainable_parameters(
             self.fxp_model,
             fw_info,
-            add_bias=gptq_config.train_bias,
-            is_gumbel=gptq_config.is_gumbel)
+            add_bias=gptq_config.train_bias)
         self.flp_weights_list, self.fxp_weights_list = get_weights_for_loss(self.fxp_model)
@@ -96,29 +100,70 @@ class KerasGPTQTrainer(GPTQTrainer):
         trainable_quantization_parameters = trainable_threshold
         self.optimizer_with_param = self.get_optimizer_with_param(flattened_trainable_weights,
                                                                   flattened_bias_weights,
-                                                                  trainable_quantization_parameters,
-                                                                  temperature_weights)
-        self.has_params_to_train = np.sum([len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param])>0
+                                                                  trainable_quantization_parameters)
+        self.has_params_to_train = np.sum(
+            [len(optimizer_params_tuple[1]) for optimizer_params_tuple in self.optimizer_with_param]) > 0
         if self.float_user_info.input_scale != self.gptq_user_info.input_scale:
             common.Logger.error("Input scale mismatch between float and GPTQ networks")  # pragma: no cover
         else:
             self.input_scale = self.gptq_user_info.input_scale
-        self.weights_for_average_loss = self.compute_jacobian_based_weights(representative_data_gen)
+        self.weights_for_average_loss = self.compute_hessian_based_weights(representative_data_gen)
+        self.reg_func = get_regularization(self.gptq_config, representative_data_gen)
-    def build_gptq_model(self):
+    def _is_gptq_applicable(self,
+                            node: common.BaseNode) -> bool:
+        """
+        A function for deciding if a layer should be fine-tuned during GPTQ.
+        Args:
+            node (BaseNode): Node for quantization decision
+        Returns:
+            A boolean whether the layer is to be wrapped with a QuantizeWrapper
+        """
+        if node.is_weights_quantization_enabled() and not self.fw_info.is_kernel_op(node.type):
+            common.Logger.error(f"GPTQ Error: Quantizing node {node.name} of type {node.type} "
+                                f"without a kernel isn't supported")
+        return node.is_weights_quantization_enabled()
+    def gptq_wrapper(self, n: common.BaseNode, layer: Layer) -> Union[qi.KerasQuantizationWrapper, Layer]:
+        """
+        A function which takes a computational graph node and a keras layer and perform the quantization wrapping.
+        Args:
+            n: A node of mct graph.
+            layer: A keras layer
+        Returns: Wrapped layer if the layer should be wrap, otherwise returns the layer as is.
+        """
+        if self._is_gptq_applicable(n):
+            weights_quantizers, activation_quantizers = quantization_builder(n, self.gptq_config)
+            return qi.KerasQuantizationWrapper(layer,
+                                               weights_quantizers=weights_quantizers,
+                                               activation_quantizers=activation_quantizers)
+        else:
+            return layer
+    def build_gptq_model(self) -> Tuple[Model, UserInformation]:
         """
         Build the GPTQ model with QuantizationWrappers
         Returns:
             Quantized graph for GPTQ fine-tuning, GPTQ graph user info
         """
-        return GPTQKerasModelBuilder(graph=self.graph_quant,
-                                     gptq_config=self.gptq_config,
-                                     append2output=self.compare_points,
-                                     fw_info=self.fw_info,
-                                     return_float_outputs=True).build_model()
+        gptq_model, gptq_user_info = KerasModelBuilder(graph=self.graph_quant,
+                                                       append2output=self.compare_points,
+                                                       fw_info=self.fw_info,
+                                                       return_float_outputs=True,
+                                                       wrapper=self.gptq_wrapper).build_model()
+        return gptq_model, gptq_user_info
     def compute_gradients(self, in_y_float: List[tf.Tensor], input_data: List[np.ndarray],
                           in_optimizer_with_param: List,
@@ -149,18 +194,9 @@ class KerasGPTQTrainer(GPTQTrainer):
                                                self.compare_points_std,
                                                self.weights_for_average_loss)
-            if self.gptq_config.is_gumbel and self.gptq_config.quantizer_config.temperature_learning:
-                gumbel_prob = get_gumbel_probability(self.fxp_model)
-                gumbel_reg = 0
-                for p in gumbel_prob:
-                    entropy = -tf.reduce_mean(
-                        tf.reduce_sum(p * tf.math.log(tf.maximum(p,
-                                                                 self.gptq_config.eps)),
-                                      axis=0))
+            reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor)
-                    gumbel_reg += entropy
-                gumbel_reg /= len(gumbel_prob)
-                loss_value += self.gptq_config.quantizer_config.gumbel_entropy_regularization * gumbel_reg
+            loss_value += reg_value
         # Use the gradient tape to automatically retrieve
         # the gradients of the trainable variables with respect to the loss.
@@ -179,9 +215,6 @@ class KerasGPTQTrainer(GPTQTrainer):
             representative_data_gen: Dataset to use for inputs of the models.
         """
         compute_gradients = self.compute_gradients
-        if self.gptq_config.sam_optimization:
-            sam = SAM(self.fxp_model, self.compute_gradients, self.optimizer_with_param, self.gptq_config.rho)
-            compute_gradients = sam.compute_gradients
         # ----------------------------------------------
         # Training loop
@@ -237,7 +270,8 @@ class KerasGPTQTrainer(GPTQTrainer):
             for data in tqdm(data_function()):
                 input_data = [d * self.input_scale for d in data]
-                loss_value_step, grads = self.nano_training_step(input_data, in_compute_gradients, in_optimizer_with_param, is_training)
+                loss_value_step, grads = self.nano_training_step(input_data, in_compute_gradients,
+                                                                 in_optimizer_with_param, is_training)
                 # Run one step of gradient descent by updating
                 # the value of the variables to minimize the loss.
                 for i, (o, p) in enumerate(in_optimizer_with_param):
@@ -258,16 +292,17 @@ class KerasGPTQTrainer(GPTQTrainer):
         graph = copy.copy(self.graph_quant)
         for layer in self.fxp_model.layers:
-            if isinstance(layer, QuantizeWrapper) and isinstance(
-                    layer.quantize_config, WeightQuantizeConfig):
+            if isinstance(layer, KerasQuantizationWrapper):
                 node = graph.find_node_by_name(layer.layer.name)
                 if len(node) == 0 and isinstance(layer.layer, TensorFlowOpLayer):
                     node = graph.find_node_by_name('_'.join(layer.layer.name.split('_')[3:]))
                 if len(node) != 1:
                     common.Logger.error(f"Can't update GPTQ graph due to missing layer named: {layer.layer.name}")
                 node = node[0]
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
+                                                                      fw_info=self.fw_info)
                 weights, weight_quant_config, activation_quant_config = \
-                    layer.quantize_config.update_layer_quantization_params(layer)
+                    layer.weights_quantizers[kernel_attribute].update_layer_quantization_params(layer)
                 for weight_attr, weight in weights.items():
                     node.set_weights_by_keys(weight_attr, weight.numpy())
                 for config_attr, config_value in weight_quant_config.items():
@@ -281,4 +316,3 @@ class KerasGPTQTrainer(GPTQTrainer):
                         node.set_weights_by_keys(BIAS, new_bias)
         return graph

model_compression_toolkit/gptq/keras/graph_info.py CHANGED Viewed

@@ -13,22 +13,21 @@
 # limitations under the License.
 # ==============================================================================
 import tensorflow as tf
-from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import QuantizeWrapper
 from typing import Tuple, List
 from model_compression_toolkit.core.keras.constants import USE_BIAS
-from model_compression_toolkit.gptq.keras.quantizer import WeightQuantizeConfig
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from tensorflow.keras.models import Model
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
+from model_compression_toolkit.quantizers_infrastructure import KerasQuantizationWrapper
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
-def get_trainable_parameters(fxp_model: Model,
-                             fw_info: FrameworkInfo,
-                             add_bias: bool = False,
-                             is_gumbel: bool = False) -> (
-        List[tf.Variable], List[tf.Variable], List[tf.Variable], List[tf.Variable], List[tf.Variable]):
+def get_gptq_trainable_parameters(fxp_model: Model,
+                                  fw_info: FrameworkInfo,
+                                  add_bias: bool = False) -> (
+        List[tf.Variable], List[tf.Variable], List[tf.Variable]):
     """
     Get trainable parameters from all layers in a model
@@ -36,7 +35,6 @@ def get_trainable_parameters(fxp_model: Model,
         fxp_model: Model to get its trainable parameters.
         fw_info: Framework information needed for keras kernel ops list.
         add_bias: Whether to include biases of the model (if there are) or not.
-        is_gumbel: Whether the fxp model is quantized using Gumbel Rounding
     Returns:
         A list of trainable variables in a model. Each item is a list of a layers weights.
@@ -45,15 +43,17 @@ def get_trainable_parameters(fxp_model: Model,
     trainable_weights: List[tf.Tensor] = []
     trainable_threshold: List[tf.Tensor] = []
     bias_weights: List[List[tf.Tensor]] = []
-    temperature_weights: List[tf.Tensor] = []
     for layer in fxp_model.layers:
-        if isinstance(layer, QuantizeWrapper) and isinstance(
-                layer.quantize_config, WeightQuantizeConfig):
-            # collect trainable weights per layer
-            layer_trainable_weights = layer.quantize_config.get_aux_variable()
-            layer_trainable_threshold = layer.quantize_config.get_quantization_variable()
-            if is_gumbel:
-                temperature_weights.append(layer.quantize_config.get_temperature_variable())
+        if isinstance(layer, KerasQuantizationWrapper):
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_KERAS_INFO)
+            # collect trainable weights per quantizer
+            quantizer_trainable_weights = layer.weights_quantizers[kernel_attribute].get_trainable_variables(VariableGroup.WEIGHTS)
+            quantizer_trainable_threshold = layer.weights_quantizers[kernel_attribute].get_trainable_variables(VariableGroup.QPARAMS)
+            trainable_weights.append(quantizer_trainable_weights)
+            trainable_threshold.extend(quantizer_trainable_threshold)
             if add_bias:
                 kernel_ops_attrs = fw_info.kernel_ops_attributes_mapping.get(type(layer.layer))
@@ -61,27 +61,8 @@ def get_trainable_parameters(fxp_model: Model,
                            and layer.layer.get_config().get(USE_BIAS)
                 if use_bias is not None and use_bias:
                     bias_weights.append([layer.layer.bias])
-            trainable_weights.append(layer_trainable_weights)
-            trainable_threshold.extend(layer_trainable_threshold)
-    return trainable_weights, bias_weights, trainable_threshold, temperature_weights
-def get_gumbel_probability(fxp_model: Model) -> List[tf.Tensor]:
-    """
-    This function return the gumbel softmax probability of GumRounding
-    Args:
-        fxp_model: A model to be quantized with GumRounding
-    Returns: A list of tensors.
-    """
-    gumbel_prob_aux: List[tf.Tensor] = []
-    for layer in fxp_model.layers:
-        if isinstance(layer, QuantizeWrapper) and isinstance(
-                layer.quantize_config, WeightQuantizeConfig):
-            gumbel_prob_aux.append(layer.quantize_config.get_gumbel_probability())
-    return gumbel_prob_aux
+    return trainable_weights, bias_weights, trainable_threshold
 def get_weights_for_loss(fxp_model: Model) -> Tuple[List[list], List[list]]:
@@ -99,14 +80,14 @@ def get_weights_for_loss(fxp_model: Model) -> Tuple[List[list], List[list]]:
     flp_weights_list = []
     fxp_weights_list = []
     for layer in fxp_model.layers:
-        if isinstance(layer, QuantizeWrapper) and isinstance(
-                layer.quantize_config, WeightQuantizeConfig):
+        if isinstance(layer, KerasQuantizationWrapper):
             # collect pairs of float and quantized weights per layer
             _layer_flp_weights, _layer_fxp_weights = [], []
-            for weight, quantizer, quantizer_vars in layer._weight_vars:
-                _layer_flp_weights.append(weight)
-                _layer_fxp_weights.append(quantizer(weight, training=False, weights=quantizer_vars))
+            for weight, quantizer_vars, quantizer in layer.get_weights_vars():
+                _layer_flp_weights.append(quantizer_vars)
+                _layer_fxp_weights.append(quantizer(training=False, inputs=quantizer_vars))
             flp_weights_list.append(_layer_flp_weights)
             fxp_weights_list.append(_layer_fxp_weights)

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -85,26 +85,18 @@ if common.constants.FOUND_TF:
             Create a GradientPTQConfigV2 to run for 5 epochs:
-            >>> gptq_conf = mct.get_keras_gptq_config(n_epochs=5)
+            >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=5)
             Other Tensorflow optimizers can be passed:
-            >>> gptq_conf = mct.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
+            >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
             The configuration can be passed to :func:`~model_compression_toolkit.keras_post_training_quantization` in order to quantize a keras model using gptq.
         """
         bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT, momentum=GPTQ_MOMENTUM)
-        optimizer_quantization_parameter = tf.keras.optimizers.SGD(learning_rate=LR_QUANTIZATION_PARAM_DEFAULT, momentum=GPTQ_MOMENTUM)
-        return GradientPTQConfigV2(n_epochs,
-                                   optimizer,
-                                   optimizer_rest=optimizer_rest,
-                                   loss=loss,
-                                   log_function=log_function,
-                                   train_bias=True,
-                                   quantization_parameters_learning=True,
-                                   optimizer_bias=bias_optimizer,
-                                   optimizer_quantization_parameter=optimizer_quantization_parameter)
+        return GradientPTQConfigV2(n_epochs, optimizer, optimizer_rest=optimizer_rest, loss=loss,
+                                   log_function=log_function, train_bias=True, optimizer_bias=bias_optimizer)
     def keras_gradient_post_training_quantization_experimental(in_model: Model,
@@ -183,11 +175,11 @@ if common.constants.FOUND_TF:
             Create GPTQ config:
-            >>> gptq_config = mct.get_keras_gptq_config(n_epochs=1)
+            >>> gptq_config = mct.gptq.get_keras_gptq_config(n_epochs=1)
             Pass the model with the representative dataset generator to get a quantized model:
-            >>> quantized_model, quantization_info = mct.keras_gradient_post_training_quantization_experimental(model, repr_datagen, gptq_config, target_kpi=kpi, core_config=config)
+            >>> quantized_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization_experimental(model, repr_datagen, gptq_config, target_kpi=kpi, core_config=config)
         """
         KerasModelValidation(model=in_model,
@@ -196,8 +188,8 @@ if common.constants.FOUND_TF:
         if core_config.mixed_precision_enable:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
                 common.Logger.error("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization API,"
-                                    "or pass a valid mixed precision configuration.")
+                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization "
+                                    "API, or pass a valid mixed precision configuration.")  # pragma: no cover
             common.Logger.info("Using experimental mixed-precision quantization. "
                                "If you encounter an issue please file a bug.")
@@ -243,10 +235,10 @@ else:
     def get_keras_gptq_config(*args, **kwargs):
         Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
                         'when using keras_post_training_quantization_mixed_precision. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover
     def keras_gradient_post_training_quantization_experimental(*args, **kwargs):
         Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
                         'when using keras_gradient_post_training_quantization_experimental. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/gptq/keras/quantizer/__init__.py CHANGED Viewed

@@ -13,4 +13,5 @@
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.gptq.keras.quantizer.configs.weight_quantizer_gptq_config import WeightQuantizeConfig
+import model_compression_toolkit.gptq.keras.quantizer.ste_rounding.symmetric_ste
+import model_compression_toolkit.gptq.keras.quantizer.soft_rounding.symmetric_soft_quantizer

model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py ADDED Viewed

@@ -0,0 +1,112 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from abc import abstractmethod
+from typing import Union, Dict, List
+from model_compression_toolkit.core.common import Logger
+from model_compression_toolkit.core.common.constants import FOUND_TF
+from model_compression_toolkit.gptq.common.gptq_constants import WEIGHTS_QUANTIZATION_PARAMS
+from model_compression_toolkit.quantizers_infrastructure import TrainableQuantizerWeightsConfig, \
+    TrainableQuantizerActivationConfig
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer
+if FOUND_TF:
+    import tensorflow as tf
+    from model_compression_toolkit.quantizers_infrastructure import BaseKerasTrainableQuantizer, \
+        KerasQuantizationWrapper
+    class BaseKerasGPTQTrainableQuantizer(BaseKerasTrainableQuantizer):
+        """
+        A base class for trainable Keras quantizer for GPTQ.
+        """
+        def __init__(self,
+                     quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
+            """
+            Initializes BaseKerasGPTQTrainableQuantizer object.
+            Args:
+                quantization_config: quantizer config class contains all the information about a quantizer configuration.
+            """
+            super().__init__(quantization_config)
+        def update_layer_quantization_params(self, layer: KerasQuantizationWrapper
+                                             ) -> (Dict[str, tf.Tensor], Dict[str, Dict], Dict):
+            """
+            A Function to calculate the needed change in attributes in NodeQuantizationConfig after retraining.
+            Args:
+                layer: A wrapped Keras layer.
+            Returns:
+                3 dictionaries describing the change in layer's weights, weights config, activation config
+                that changed during GPTQ retraining.
+                Keys must match NodeQuantizationConfig attributes
+            """
+            weights = {}
+            for weight, quantizer_vars, quantizer in layer.get_weights_vars():
+                if not isinstance(quantizer, BaseTrainableQuantizer):
+                    Logger.error(f"Expecting a GPTQ trainable quantizer, "  # pragma: no cover
+                                 f"but got {type(quantizer)} which is not callable.")
+                weights.update({weight: quantizer(training=False, inputs=quantizer_vars)})
+            quant_config = {WEIGHTS_QUANTIZATION_PARAMS: self.get_quant_config()}
+            return weights, quant_config, {}
+        def get_aux_variable(self) -> List[tf.Tensor]:
+            """
+            This function return a list with the quantizer's quantization auxiliary variables.
+            Returns: A list with the quantization auxiliary variables.
+            """
+            return []  # pragma: no cover
+        def get_quantization_variable(self) -> List[tf.Tensor]:
+            """
+            This function return a list with the quantizer's quantization parameters variables.
+            Returns: A list with the quantization parameters.
+            """
+            return []  # pragma: no cover
+        @abstractmethod
+        def get_quant_config(self):
+            """
+            Returns the config used to edit NodeQuantizationConfig after GPTQ retraining.
+            Returns:
+                A dictionary of attributes the quantize_config retraining has changed during GPTQ retraining.
+                Keys must match NodeQuantizationConfig attributes.
+            """
+            raise NotImplemented(f'{self.__class__.__name__} have to implement the '  # pragma: no cover
+                                 f'quantizer\'s get_quant_config.')
+else:
+    class BaseKerasGPTQTrainableQuantizer:  # pragma: no cover
+        def __init__(self, *args, **kwargs):
+            Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
+                            'when using BaseKerasGPTQTrainableQuantizer. '
+                            'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/gptq/keras/quantizer/quant_utils.py CHANGED Viewed

@@ -26,6 +26,19 @@ def ste_ceil(x: tf.Tensor) -> tf.Tensor:
     return error + x
+def safe_log(x: tf.Tensor, eps: float) -> tf.Tensor:
+    """
+    Computes log function of x unless x is smaller than some small value, so the log function would not fail.
+    Args:
+        x: input variable.
+        eps: limit value.
+    Returns: log of x where x > eps, else, log of eps.
+    """
+    return tf.math.log(tf.maximum(x, eps))
 def ste_round(x: tf.Tensor) -> tf.Tensor:
     """
     Return the rounded values of a tensor.
@@ -59,20 +72,6 @@ def calculate_delta(max_tensor: tf.Tensor,
     return max_tensor / (2 ** (num_bits - int(signed)))
-def adjustable_steps(x: tf.Variable, t: float) -> tf.Tensor:
-    """
-    A function to gradually quantize a float variable to an integer of values [-1, 0 ,1]
-    Args:
-        x: input float variable
-        t: temperature to control quantization
-    Returns:
-        semi-quantized variable
-    """
-    return tf.sigmoid(tf.add(x, 1) / t) + tf.sigmoid(tf.add(x, -1) / t) - 1
 def ste_clip(x: [tf.Tensor, tf.Variable], max_val=1, min_val=None) -> tf.Tensor:
     """
     clip a variable between fixed values such that min_val<=output<=max_val

model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Dict, List, Tuple
+from model_compression_toolkit.gptq import GradientPTQConfigV2
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
+from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
+    get_inferable_quantizer_kwargs
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
+from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
+from model_compression_toolkit.quantizers_infrastructure import QuantizationTarget
+from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.common.get_quantizers import \
+    get_inferable_quantizer_class
+from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.keras.quantizers.base_keras_inferable_quantizer import \
+    BaseKerasInferableQuantizer
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.get_quantizer_config import \
+    get_trainable_quantizer_weights_config
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.get_quantizers import \
+    get_trainable_quantizer_class
+def quantization_builder(n: common.BaseNode,
+                         gptq_config: GradientPTQConfigV2
+                         ) -> Tuple[Dict[str, BaseKerasGPTQTrainableQuantizer], List[BaseKerasInferableQuantizer]]:
+    """
+    Build quantizers for a node according to its quantization configuration and
+    a global NoOpQuantizeConfig object.
+    Args:
+        n: Node to build its QuantizeConfig.
+        gptq_config (GradientPTQConfigV2): GradientPTQConfigV2 configuration.
+    Returns:
+        A dictionary which maps the weights kernel attribute to a quantizer for GPTQ training.
+        Note that we return a dictionary although there is only a single attribute that is being mapped to a quantizer,
+        to be compatible with the quantization infrastructure template.
+    """
+    weights_quantizers = {}
+    if n.is_weights_quantization_enabled():
+        quant_method = n.final_weights_quantization_cfg.weights_quantization_method
+        quantizer_class = get_trainable_quantizer_class(quant_target=QuantizationTarget.Weights,
+                                                        quantizer_type=gptq_config.rounding_type,
+                                                        quant_method=quant_method,
+                                                        quantizer_base_class=BaseKerasGPTQTrainableQuantizer)
+        kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=n.type,
+                                                              fw_info=DEFAULT_KERAS_INFO)
+        weights_quantizers.update({kernel_attribute: quantizer_class(get_trainable_quantizer_weights_config(n),
+                                                                     **gptq_config.gptq_quantizer_params_override)})
+    activation_quantizers = []
+    if n.is_activation_quantization_enabled():
+        quant_method = n.final_activation_quantization_cfg.activation_quantization_method
+        quantizer_class = get_inferable_quantizer_class(quant_target=QuantizationTarget.Activation,
+                                                        quant_method=quant_method,
+                                                        quantizer_base_class=BaseKerasInferableQuantizer)
+        kwargs = get_inferable_quantizer_kwargs(n, QuantizationTarget.Activation)
+        activation_quantizers.append(quantizer_class(**kwargs))
+    return weights_quantizers, activation_quantizers

mct-nightly 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl