PyPI - mct-nightly - Versions diffs - 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl - Mend

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py CHANGED Viewed

@@ -13,45 +13,24 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Dict, Any, List
+from typing import Dict, Any
 import numpy as np
 import tensorflow as tf
-from tensorflow_model_optimization.python.core.quantization.keras.quantize_wrapper import QuantizeWrapper
-from tensorflow.python.framework.tensor_shape import TensorShape
-from model_compression_toolkit.core.keras.quantizer.base_quantizer import BaseTrainableQuantizer
-from model_compression_toolkit.gptq.keras.quantizer import quant_utils as  qutils
+from model_compression_toolkit.gptq import RoundingType
+from model_compression_toolkit import quantizers_infrastructure as qi
+from model_compression_toolkit.core.common.target_platform import QuantizationMethod
+from model_compression_toolkit.gptq.common.gptq_constants import AUXVAR, PTQ_THRESHOLD
+from model_compression_toolkit.gptq.keras.quantizer import quant_utils as qutils
 from model_compression_toolkit.core.common.constants import THRESHOLD
 from model_compression_toolkit.core.common.defaultdict import DefaultDict
-from model_compression_toolkit.gptq.keras.quantizer.kernel_functions import get_kernel
-from model_compression_toolkit.gptq.common import gptq_constants
-def symmetric_quantizer(input_tensor: tf.Tensor,
-                                    max_tensor: tf.Tensor,
-                                    num_bits: int,
-                                    signed: bool,
-                                    power_of_two: bool = False) -> tf.Tensor:
-    """
-    Quantize a tensor symmetrically.
-    Args:
-        input_tensor: Tensor to quantize. values of this tensor are not changed during gptq.
-        max_tensor: Tensor with max values to compute the threshold.
-        num_bits: Num of bits to use.
-        signed: Signedness of the quantization range.
-        power_of_two: Whether the threshold should be constrained or not.
-    Returns:
-        A quantized tensor.
-    """
-    if power_of_two:
-        max_tensor = qutils.power_of_two_max(max_tensor)
-    delta = qutils.calculate_delta(max_tensor, num_bits, signed)
-    tensor_q = qutils.ste_round(input_tensor / delta)
-    min_int = -int(signed) * (2 ** (num_bits - int(signed)))
-    max_int = (2 ** (num_bits - int(signed))) - 1
-    return delta * qutils.ste_clip(tensor_q, max_val=max_int, min_val=min_int)
+from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
+from model_compression_toolkit.quantizers_infrastructure import TrainableQuantizerWeightsConfig
+from model_compression_toolkit.quantizers_infrastructure.inferable_infrastructure.common.base_inferable_quantizer import mark_quantizer
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.quant_utils import \
+    get_threshold_reshape_shape
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
 def pertubation_symmetric_quantizer(input_tensor: tf.Tensor,
@@ -63,6 +42,7 @@ def pertubation_symmetric_quantizer(input_tensor: tf.Tensor,
                                     max_lsbs_change: int = 1) -> tf.Tensor:
     """
     Quantize a tensor symmetrically with maximum LSBs shift.
     Args:
         input_tensor: Tensor to quantize. values of this tensor are not changed during gptq.
         auxvar_tensor: Tensor that manifests the bit shift the weight due to gptq
@@ -87,195 +67,115 @@ def pertubation_symmetric_quantizer(input_tensor: tf.Tensor,
     return delta * qutils.ste_clip(tensor_q, max_val=max_int, min_val=min_int)
-class STEWeightQuantizer(BaseTrainableQuantizer):
+@mark_quantizer(quantization_target=qi.QuantizationTarget.Weights,
+                quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
+                quantizer_type=RoundingType.STE)
+class STEWeightGPTQQuantizer(BaseKerasGPTQTrainableQuantizer):
     """
-    Trainable constrained quantizer to quantize a layer inputs.
+    Trainable symmetric quantizer to quantize a layer weights.
     """
     def __init__(self,
-                 num_bits: int,
-                 per_axis: bool,
-                 signed: bool,
-                 threshold_values: np.ndarray,
-                 quantization_axis: int = -1,
-                 power_of_two: bool = True,
+                 quantization_config: TrainableQuantizerWeightsConfig,
                  max_lsbs_change_map: dict = DefaultDict({}, lambda: 1)):
         """
-        Initialize a TrainableWeightQuantizer object with parameters to use
-        for the quantization.
+        Initialize a STEWeightGPTQQuantizer object with parameters to use for the quantization.
         Args:
-            num_bits: Number of bits to use for the quantization.
-            per_axis: Whether to quantize per-channel or per-tensor.
-            signed: Signedness to use for the quantization range.
-            threshold_values: Threshold to use for the quantization.
-            quantization_axis: Axis of tensor to use for the quantization.
-            power_of_two: Whether the threshold should be constrained or not.
+            quantization_config: Trainable weights quantizer config.
             max_lsbs_change_map: a mapping between number of bits to max lsb change.
         """
-        self.num_bits = num_bits
-        self.per_axis = per_axis
-        self.signed = signed
+        super().__init__(quantization_config)
+        self.num_bits = quantization_config.weights_n_bits
+        self.per_channel = quantization_config.weights_per_channel_threshold
+        threshold_values = quantization_config.weights_quantization_params[THRESHOLD]
         self.threshold_shape = np.asarray(threshold_values).shape
-        self.threshold_values = np.reshape(np.asarray(threshold_values), [-1]) if self.per_axis else float(
+        self.threshold_values = np.reshape(np.asarray(threshold_values), [-1]) if self.per_channel else float(
             threshold_values)
-        self.quantization_axis = quantization_axis
-        self.power_of_two = power_of_two
-        self.max_lsbs_change = max_lsbs_change_map.get(num_bits)
-        self.quantizer_parameters = {}
-    def build(self,
-              tensor_shape: TensorShape,
-              name: str,
-              layer: QuantizeWrapper) -> Dict[str, tf.Variable]:
+        self.quantization_axis = quantization_config.weights_channels_axis
+        self.power_of_two = quantization_config.weights_quantization_method == QuantizationMethod.POWER_OF_TWO
+        self.max_lsbs_change = max_lsbs_change_map.get(self.num_bits)
+    def initialize_quantization(self,
+                                tensor_shape: Any,
+                                name: str,
+                                layer: Any):
         """
-        Add min and max variables to layer.
-        Args:
-            tensor_shape: Tensor shape the quantizer quantize.
-            name: Prefix of variables names.
-            layer: Layer to add the variables to. The variables are saved
-            in the layer's scope.
+        Add quantizer parameters to the quantizer parameters dictionary
-        Returns:
-            Dictionary of new variables.
+        Args:
+            tensor_shape: tensor shape of the quantized tensor.
+            name: Tensor name.
+            layer: Layer to quantize.
         """
-        w_shape = get_kernel(layer.weights).shape
-        ar_iter = layer.add_weight(
-            name + gptq_constants.GPTQ_ITER,
-            shape=(),
-            initializer=tf.keras.initializers.Constant(0.0),
-            trainable=False)
         ptq_threshold_tensor = layer.add_weight(
-            name + gptq_constants.THRESHOLD_TENSOR,
-            shape=len(self.threshold_values) if self.per_axis else (),
+            f"{name}_{PTQ_THRESHOLD}",
+            shape=len(self.threshold_values) if self.per_channel else (),
             initializer=tf.keras.initializers.Constant(1.0),
             trainable=False)
         ptq_threshold_tensor.assign(self.threshold_values)
+        w = getattr(layer.layer, name)
         auxvar_tensor = layer.add_weight(
-            name + gptq_constants.AUXVAR,
-            shape=w_shape,
+            f"{name}_{AUXVAR}",
+            shape=list(w.shape),
             initializer=tf.keras.initializers.Constant(0.0),
             trainable=True)
         # save the quantizer added parameters for later calculations
-        self.quantizer_parameters = {gptq_constants.THRESHOLD_TENSOR: ptq_threshold_tensor,
-                                     gptq_constants.AUXVAR: auxvar_tensor,
-                                     gptq_constants.GPTQ_ITER: ar_iter}
-        return self.quantizer_parameters
+        self.add_quantizer_variable(PTQ_THRESHOLD, ptq_threshold_tensor, VariableGroup.QPARAMS)
+        self.add_quantizer_variable(AUXVAR, auxvar_tensor, VariableGroup.WEIGHTS)
-    def __call__(self, inputs: tf.Tensor,
-                 training: bool,
-                 weights: Dict[str, tf.Variable],
-                 **kwargs: Dict[str, Any]):
+    def __call__(self,
+                 inputs: tf.Tensor,
+                 training: bool):
         """
         Quantize a tensor.
         Args:
             inputs: Input tensor to quantize.
             training: Whether the graph is in training mode.
-            weights: Dictionary of weights the quantizer can use to quantize the tensor.
-            **kwargs: Additional variables the quantizer may receive.
         Returns:
             The quantized tensor.
         """
-        auxvar = weights[gptq_constants.AUXVAR]
-        ptq_threshold_tensor = weights[gptq_constants.THRESHOLD_TENSOR]
+        auxvar = self.get_quantizer_variable(AUXVAR)
+        ptq_threshold_tensor = self.get_quantizer_variable(PTQ_THRESHOLD)
-        if self.per_axis:
-            input_shape = inputs.shape
-            n_axis = len(input_shape)
-            quantization_axis = n_axis + self.quantization_axis if self.quantization_axis < 0 else \
-                self.quantization_axis
-            reshape_shape = [-1 if i == quantization_axis else 1 for i in range(n_axis)]
+        if self.per_channel:
+            reshape_shape = get_threshold_reshape_shape(inputs.shape,
+                                                        quant_axis=self.quantization_axis,
+                                                        quant_axis_dim=-1)
             ptq_threshold_tensor = tf.reshape(ptq_threshold_tensor, reshape_shape)
-            q_tensor = pertubation_symmetric_quantizer(inputs, auxvar,
+            q_tensor = pertubation_symmetric_quantizer(inputs,
+                                                       auxvar,
                                                        ptq_threshold_tensor,
                                                        self.num_bits,
-                                                       self.signed,
-                                                       self.power_of_two,
+                                                       signed=True,
+                                                       power_of_two=self.power_of_two,
                                                        max_lsbs_change=self.max_lsbs_change)
             return q_tensor
         else:
-            return pertubation_symmetric_quantizer(inputs, auxvar,
+            return pertubation_symmetric_quantizer(inputs,
+                                                   auxvar,
                                                    ptq_threshold_tensor,
                                                    self.num_bits,
-                                                   self.signed,
-                                                   self.power_of_two)
-    def get_aux_variable(self) -> tf.Tensor:
-        return self.quantizer_parameters[gptq_constants.AUXVAR]
-    def get_config(self) -> Dict[str, Any]:
-        """
-        Returns: Configuration of TrainableQuantizer.
-        """
+                                                   signed=True,
+                                                   power_of_two=self.power_of_two)
-        return {
-            'num_bits': self.num_bits,
-            'per_axis': self.per_axis,
-            'symmetric': self.symmetric,
-            'power_of_two': self.power_of_two
-        }
-    def get_quant_config(self, layer) -> Dict[str, np.ndarray]:
+    def get_quant_config(self) -> Dict[str, np.ndarray]:
         """
         Returns the config used to edit NodeQuantizationConfig after GPTQ retraining
-        Args:
-            layer: quantized layer
         Returns:
             A dictionary of attributes the quantize_config retraining has changed during GPTQ retraining.
             Keys must match NodeQuantizationConfig attributes
         """
-        old_threshold = self.quantizer_parameters[gptq_constants.THRESHOLD_TENSOR]
+        old_threshold = self.get_quantizer_variable(PTQ_THRESHOLD)
         return {THRESHOLD: old_threshold.numpy().reshape(self.threshold_shape)}
-    def get_trainable_parameters(self):
-        """
-        A function to get a list trainable of trainable parameters of the quantizer for GPTQ retraining
-        Returns:
-            A list of trainable Tensors
-        """
-        return [t for t in self.quantizer_parameters.values() if t.trainable]
-    def get_quantization_variable(self) -> List[tf.Tensor]:
-        """
-         This function return a list of quantizer parameters.
-         Returns: A list of the quantizer parameters
-         """
-        return [self.quantizer_parameters[gptq_constants.THRESHOLD_TENSOR]]
-    def __eq__(self, other: Any) -> bool:
-        """
-        Check if equals to another object.
-        Args:
-            other: Other object to compare.
-        Returns:
-            Whether they are equal or not.
-        """
-        if not isinstance(other, STEWeightQuantizer):
-            return False
-        return (self.num_bits == other.num_bits and
-                self.per_axis == other.per_axis and
-                self.symmetric == other.symmetric)
-    def __ne__(self, other: Any) -> bool:
-        """
-        Check if not equals to another object.
-        Args:
-            other: Other object to compare.
-        Returns:
-            Whether they are differ or not.
-        """
-        return not self.__eq__(other)

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -12,24 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, List, Tuple
+from typing import Callable, List, Tuple, Union
 import numpy as np
+from torch.nn import Module
 from tqdm import tqdm
 import copy
 import torch
 from model_compression_toolkit.core.common.logger import Logger
+from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
 from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
-from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.pytorch.constants import BIAS, KERNEL
-from model_compression_toolkit.gptq.pytorch.gptq_model_builder import GPTQPytorchModelBuilder
+from model_compression_toolkit.core.pytorch.constants import BIAS
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, set_model, torch_tensor_to_numpy
-from model_compression_toolkit.gptq.pytorch.gptq_graph_info import get_trainable_parameters, get_weights_for_loss
-from model_compression_toolkit.gptq.pytorch.quantizer.quantizer_wrapper import WeightQuantizerWrapper
-from model_compression_toolkit.gptq.pytorch.gptq_graph_info import get_gumbel_probability
+from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters, \
+    get_weights_for_loss
+from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
+from model_compression_toolkit import quantizers_infrastructure as qi
+from model_compression_toolkit.gptq.pytorch.quantizer.regularization_factory import get_regularization
+from model_compression_toolkit.quantizers_infrastructure import PytorchQuantizationWrapper
 class PytorchGPTQTrainer(GPTQTrainer):
@@ -66,11 +71,9 @@ class PytorchGPTQTrainer(GPTQTrainer):
         else:
             self.input_scale = self.gptq_user_info.input_scale
-        trainable_weights, trainable_bias, trainable_threshold, trainable_temperature = get_trainable_parameters(
+        trainable_weights, trainable_bias, trainable_threshold = get_gptq_trainable_parameters(
             self.fxp_model,
-            add_bias=self.gptq_config.train_bias,
-            quantization_parameters_learning=self.gptq_config.quantization_parameters_learning,
-            is_gumbel=self.gptq_config.is_gumbel)
+            add_bias=self.gptq_config.train_bias)
         self.flp_weights_list, self.fxp_weights_list = get_weights_for_loss(self.fxp_model)
         if not (len(self.compare_points) == len(trainable_weights) == len(self.flp_weights_list) == len(
@@ -81,10 +84,45 @@ class PytorchGPTQTrainer(GPTQTrainer):
         self.optimizer_with_param = self.get_optimizer_with_param(trainable_weights,
                                                                   trainable_bias,
-                                                                  trainable_threshold,
-                                                                  trainable_temperature)
+                                                                  trainable_threshold)
-        self.weights_for_average_loss = to_torch_tensor(self.compute_jacobian_based_weights(representative_data_gen))
+        self.weights_for_average_loss = to_torch_tensor(self.compute_hessian_based_weights(representative_data_gen))
+        self.reg_func = get_regularization(self.gptq_config, representative_data_gen)
+    def _is_gptq_applicable(self,
+                            node: BaseNode) -> bool:
+        """
+        A function for deciding if a layer should be fine-tuned during GPTQ.
+        Args:
+            node (BaseNode): Node for quantization decision
+        Returns:
+            A boolean whether the layer is to be wrapped with a Quantization Wrapper.
+        """
+        if node.is_weights_quantization_enabled() and not self.fw_info.is_kernel_op(node.type):
+            Logger.error(f"GPTQ Error: Quantizing node {node.name} of type {node.type} "
+                         f"without a kernel isn't supported.")
+        return node.is_weights_quantization_enabled()
+    def gptq_wrapper(self, n: BaseNode, layer: Module) -> Union[qi.PytorchQuantizationWrapper, Module]:
+        """
+        A function which takes a computational graph node and a pytorch layer and perform the quantization wrapping.
+        Args:
+            n: A node of mct graph.
+            layer: A pytorch layer
+        Returns: Wrapped layer if the layer should be wrap, otherwise returns the layer as is.
+        """
+        if self._is_gptq_applicable(n):
+            weights_quantizers, activation_quantizers = quantization_builder(n, self.gptq_config)
+            return qi.PytorchQuantizationWrapper(layer,
+                                                 weights_quantizers=weights_quantizers,
+                                                 activation_quantizers=activation_quantizers)
+        else:
+            return layer
     def build_gptq_model(self):
         """
@@ -92,10 +130,13 @@ class PytorchGPTQTrainer(GPTQTrainer):
         Returns:
             Quantized graph for GPTQ fine-tuning, GPTQ graph user info
         """
-        return GPTQPytorchModelBuilder(self.graph_quant,
-                                       self.gptq_config,
-                                       append2output=self.compare_points,
-                                       return_float_outputs=True).build_model()
+        gptq_model, gptq_user_info = PyTorchModelBuilder(graph=self.graph_quant,
+                                                         append2output=self.compare_points,
+                                                         fw_info=self.fw_info,
+                                                         wrapper=self.gptq_wrapper,
+                                                         return_float_outputs=True).build_model()
+        return gptq_model, gptq_user_info
     def train(self, representative_data_gen: Callable):
         """
@@ -145,14 +186,9 @@ class PytorchGPTQTrainer(GPTQTrainer):
                                            self.compare_points_std,
                                            self.weights_for_average_loss)
-        if self.gptq_config.is_gumbel and self.gptq_config.quantizer_config.temperature_learning:
-            gumbel_prob = get_gumbel_probability(self.fxp_model)
-            gumbel_reg = 0
-            for p in gumbel_prob:
-                entropy = -torch.mean(torch.sum(p * torch.log(torch.maximum(p, self.gptq_config.eps*torch.ones_like(p))),dim=0))
-                gumbel_reg += entropy
-            gumbel_reg = 0 if gumbel_reg == 0 else gumbel_reg/len(gumbel_prob)
-            loss_value += self.gptq_config.quantizer_config.gumbel_entropy_regularization * gumbel_reg
+        reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor)
+        loss_value += reg_value
         # Back-pass
         loss_value.backward()
@@ -202,20 +238,23 @@ class PytorchGPTQTrainer(GPTQTrainer):
         # Update graph after training
         for name, layer in self.fxp_model.named_modules():
-            if isinstance(layer, WeightQuantizerWrapper):
+            if isinstance(layer, PytorchQuantizationWrapper):
                 node = self.graph_quant.find_node_by_name(name)
                 if len(node) != 1:
                     Logger.error(f"Can't update GPTQ graph due to missing layer named: {name}")
                 node = node[0]
-                # Weight
-                node.set_weights_by_keys(KERNEL, self.fw_impl.to_numpy(layer.weight_quantizer(layer.float_weight, training=False)))
-                # Weight quantization params
-                if self.gptq_config.quantization_parameters_learning:
-                    node.final_weights_quantization_cfg.set_weights_quantization_param(layer.weight_quantizer.get_weight_quant_params())
-                # Bias
-                if self.gptq_config.train_bias and hasattr(layer.op, BIAS):
-                    node.set_weights_by_keys(BIAS, self.fw_impl.to_numpy(getattr(layer.op, BIAS)))
+                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
+                                                                      fw_info=self.fw_info)
+                weights, weight_quant_config, activation_quant_config = \
+                    layer.weights_quantizers[kernel_attribute].update_layer_quantization_params(layer)
+                for weight_attr, weight in weights.items():
+                    node.set_weights_by_keys(weight_attr, self.fw_impl.to_numpy(weight))
+                for config_attr, config_value in weight_quant_config.items():
+                    node.final_weights_quantization_cfg.set_quant_config_attr(config_attr, config_value)
+                for config_attr, config_value in activation_quant_config.items():
+                    node.final_activation_quantization_cfg.set_quant_config_attr(config_attr, config_value)
+                if self.gptq_config.train_bias and hasattr(layer.layer, BIAS):
+                    node.set_weights_by_keys(BIAS, self.fw_impl.to_numpy(getattr(layer.layer, BIAS)))
         return graph_quant
@@ -229,7 +268,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
         # Fxp model: unfreeze bias trainable parameters
         for layer in self.fxp_model.modules():
-            if isinstance(layer, WeightQuantizerWrapper):
-                if hasattr(layer.op, BIAS):
-                    bias = getattr(layer.op, BIAS)
+            if isinstance(layer, PytorchQuantizationWrapper):
+                if hasattr(layer.layer, BIAS):
+                    bias = getattr(layer.layer, BIAS)
                     bias.requires_grad = self.gptq_config.train_bias

model_compression_toolkit/gptq/pytorch/graph_info.py ADDED Viewed

@@ -0,0 +1,81 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import torch
+import torch.nn as nn
+from typing import List
+from model_compression_toolkit.core.pytorch.constants import BIAS
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
+from model_compression_toolkit.quantizers_infrastructure import PytorchQuantizationWrapper
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
+def get_gptq_trainable_parameters(fxp_model: nn.Module,
+                                  add_bias: bool = False,
+                                  ) -> (List[nn.Parameter], List[nn.Parameter], List[nn.Parameter]):
+    """
+    Get trainable parameters from all layers in a model
+    Args:
+        fxp_model: Model to get its trainable parameters.
+        add_bias: Whether to include biases of the model (if there are) or not.
+    Returns:
+        A list of trainable variables in a model. Each item is a list of a layers weights.
+    """
+    trainable_aux_weights = nn.ParameterList()
+    trainable_threshold = nn.ParameterList()
+    trainable_bias = nn.ParameterList()
+    for layer in fxp_model.modules():
+        if isinstance(layer, PytorchQuantizationWrapper):
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_PYTORCH_INFO)
+            # collect trainable weights per quantizer
+            quantizer_trainable_weights = layer.weights_quantizers[kernel_attribute].get_trainable_variables(VariableGroup.WEIGHTS)
+            quantizer_trainable_threshold = layer.weights_quantizers[kernel_attribute].get_trainable_variables(VariableGroup.QPARAMS)
+            trainable_aux_weights.extend(quantizer_trainable_weights)
+            trainable_threshold.extend(quantizer_trainable_threshold)
+            if add_bias and hasattr(layer.layer, BIAS):
+                bias = getattr(layer.layer, BIAS)
+                trainable_bias.append(bias)
+    return trainable_aux_weights, trainable_bias, trainable_threshold
+def get_weights_for_loss(fxp_model: nn.Module) -> [List[nn.Parameter], List[torch.Tensor]]:
+    """
+    Get all float and quantized kernels for the GPTQ loss
+    Args:
+        fxp_model: Model to get its float and quantized weights.
+    Returns:
+        A list of float kernels, each item is the float kernel of the layer
+        A list of quantized kernels, each item is the quantized kernel of the layer
+    """
+    flp_weights_list, fxp_weights_list = [], []
+    for layer in fxp_model.modules():
+        if isinstance(layer, PytorchQuantizationWrapper):
+            # Collect pairs of float and quantized weights per layer
+            for weight, quantizer_vars, quantizer in layer.get_weights_vars():
+                flp_weights_list.append(quantizer_vars)
+                fxp_weights_list.append(quantizer(training=False, inputs=quantizer_vars))
+    return flp_weights_list, fxp_weights_list

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -21,6 +21,7 @@ from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV
 from model_compression_toolkit.core.common.target_platform import TargetPlatformCapabilities
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.runner import core_runner, _init_tensorboard_writer
+from model_compression_toolkit.gptq.keras.quantization_facade import GPTQ_MOMENTUM
 from model_compression_toolkit.gptq.runner import gptq_runner
 from model_compression_toolkit.core.exporter import export_model
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
@@ -38,7 +39,7 @@ if FOUND_TORCH:
     from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
     from model_compression_toolkit.core.pytorch.constants import DEFAULT_TP_MODEL
     from model_compression_toolkit.gptq.pytorch.gptq_loss import multiple_tensors_mse_loss
-    from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_fully_quantized_pytorch_model
+    from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_exportable_pytorch_model
     import torch
     from torch.nn import Module
     from torch.optim import Adam, Optimizer
@@ -71,26 +72,19 @@ if FOUND_TORCH:
             Import MCT and Create a GradientPTQConfigV2 to run for 5 epochs:
             >>> import model_compression_toolkit as mct
-            >>> gptq_conf = mct.get_pytorch_gptq_config(n_epochs=5)
+            >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=5)
             Other PyTorch optimizers can be passed with dummy params:
             >>> import torch
-            >>> gptq_conf = mct.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
+            >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
             The configuration can be passed to :func:`~model_compression_toolkit.pytorch_post_training_quantization` in order to quantize a pytorch model using gptq.
         """
-        bias_optimizer = Adam([torch.Tensor([])], lr=LR_BIAS_DEFAULT)
-        optimizer_quantization_parameter = Adam([torch.Tensor([])], lr=LR_QUANTIZATION_PARAM_DEFAULT)
-        return GradientPTQConfigV2(n_epochs,
-                                   optimizer,
-                                   optimizer_rest=optimizer_rest,
-                                   loss=loss,
-                                   log_function=log_function,
-                                   train_bias=True,
-                                   optimizer_quantization_parameter=optimizer_quantization_parameter,
-                                   optimizer_bias=bias_optimizer)
+        bias_optimizer = torch.optim.SGD([torch.Tensor([])], lr=LR_BIAS_DEFAULT, momentum=GPTQ_MOMENTUM)
+        return GradientPTQConfigV2(n_epochs, optimizer, optimizer_rest=optimizer_rest, loss=loss,
+                                   log_function=log_function, train_bias=True, optimizer_bias=bias_optimizer)
     def pytorch_gradient_post_training_quantization_experimental(model: Module,
@@ -152,15 +146,15 @@ if FOUND_TORCH:
             Pass the module, the representative dataset generator and the configuration (optional) to get a quantized module
-            >>> quantized_module, quantization_info = mct.pytorch_gradient_post_training_quantization_experimental(module, repr_datagen, core_config=config, gptq_config=gptq_conf)
+            >>> quantized_module, quantization_info = mct.gptq.pytorch_gradient_post_training_quantization_experimental(module, repr_datagen, core_config=config, gptq_config=gptq_conf)
         """
         if core_config.mixed_precision_enable:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
                 common.Logger.error("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization API,"
-                                    "or pass a valid mixed precision configuration.")
+                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization "
+                                    "API, or pass a valid mixed precision configuration.")  # pragma: no cover
             common.Logger.info("Using experimental mixed-precision quantization. "
                                "If you encounter an issue please file a bug.")
@@ -212,10 +206,10 @@ else:
     def get_pytorch_gptq_config(*args, **kwargs):
         Logger.critical('Installing Pytorch is mandatory '
                         'when using pytorch_gradient_post_training_quantization_experimental. '
-                        'Could not find torch package.')
+                        'Could not find torch package.')  # pragma: no cover
     def pytorch_gradient_post_training_quantization_experimental(*args, **kwargs):
         Logger.critical('Installing Pytorch is mandatory '
                         'when using pytorch_gradient_post_training_quantization_experimental. '
-                        'Could not find the torch package.')
+                        'Could not find the torch package.')  # pragma: no cover

mct-nightly 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl