PyPI - mct-nightly - Versions diffs - 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl - Mend

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (241) hide show

model_compression_toolkit/ptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -33,7 +33,7 @@ if FOUND_TORCH:
     from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
     from model_compression_toolkit.core.pytorch.constants import DEFAULT_TP_MODEL
     from torch.nn import Module
-    from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_fully_quantized_pytorch_model
+    from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_exportable_pytorch_model
     from model_compression_toolkit import get_target_platform_capabilities
     DEFAULT_PYTORCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
@@ -62,7 +62,7 @@ if FOUND_TORCH:
             representative_data_gen (Callable): Dataset used for calibration.
             target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired.
             core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
-            target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to. `Default PyTorch TPC <https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/core/tpc_models/pytorch_tp_models/pytorch_default.py>`_
+            target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to.
             new_experimental_exporter (bool): Whether exporting the quantized model using new exporter or not (in progress. Avoiding it for now is recommended).
         Returns:
@@ -95,8 +95,9 @@ if FOUND_TORCH:
         if core_config.mixed_precision_enable:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
                 common.Logger.error("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfigV2. Please use pytorch_post_training_quantization API,"
-                                    "or pass a valid mixed precision configuration.")
+                                    "MixedPrecisionQuantizationConfigV2. Please use "
+                                    "pytorch_post_training_quantization API, or pass a valid mixed precision "
+                                    "configuration.")  # pragma: no cover
             common.Logger.info("Using experimental mixed-precision quantization. "
                                "If you encounter an issue please file a bug.")
@@ -127,7 +128,7 @@ if FOUND_TORCH:
             Logger.warning('Using new experimental exported models. '
                            'Please do not use unless you are familiar with what you are doing')
-            return get_fully_quantized_pytorch_model(tg)
+            return get_exportable_pytorch_model(tg)
         quantized_model, user_info = export_model(tg,
                                                   DEFAULT_PYTORCH_INFO,
@@ -143,4 +144,4 @@ else:
     def pytorch_post_training_quantization_experimental(*args, **kwargs):
         Logger.critical('Installing Pytorch is mandatory '
                         'when using pytorch_post_training_quantization_experimental. '
-                        'Could not find the torch package.')
+                        'Could not find the torch package.')  # pragma: no cover

model_compression_toolkit/qat/common/qat_config.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Dict
+from enum import Enum
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+def _is_qat_applicable(node: common.BaseNode,
+                       fw_info: FrameworkInfo) -> bool:
+    """
+    A function for deciding if a layer should be fine-tuned during QAT
+    Args:
+        node (BaseNode): Node for quantization decision
+        fw_info (FrameworkInfo): Pytorch quantization information
+    Returns:
+        A boolean whether the layer is to be wrapped with a QuantizeWrapper
+    """
+    if node.is_weights_quantization_enabled() and not fw_info.is_kernel_op(node.type):
+        common.Logger.error("QAT Error: Quantizing a node without a kernel isn't supported")
+    return node.is_weights_quantization_enabled() or node.is_activation_quantization_enabled()
+class TrainingMethod(Enum):
+    """
+    An enum for selecting a QAT training method
+    STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers
+    """
+    STE = "STE",
+class QATConfig:
+    """
+    QAT configuration class.
+    """
+    def __init__(self, weight_training_method: TrainingMethod = TrainingMethod.STE,
+                 activation_training_method: TrainingMethod = TrainingMethod.STE,
+                 weight_quantizer_params_override: Dict = None,
+                 activation_quantizer_params_override: Dict = None,
+                 ):
+        """
+        Args:
+            weight_training_method (TrainingMethod): Training method for weight quantizers
+            activation_training_method (TrainingMethod): Training method for activation quantizers:
+            weight_quantizer_params_override: A dictionary of parameters to override in weight quantization quantizer instantiation. Defaults to None (no parameters)
+            activation_quantizer_params_override: A dictionary of parameters to override in activation quantization quantizer instantiation. Defaults to None (no parameters)
+        """
+        self.weight_training_method = weight_training_method
+        self.activation_training_method = activation_training_method
+        self.weight_quantizer_params_override = {} if weight_quantizer_params_override is None else weight_quantizer_params_override
+        self.activation_quantizer_params_override = {} if activation_quantizer_params_override is None else activation_quantizer_params_override

model_compression_toolkit/qat/keras/quantization_facade.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ==============================================================================
 from typing import Callable
+from functools import partial
 from model_compression_toolkit import CoreConfig
 from model_compression_toolkit.core import common
@@ -29,25 +30,56 @@ from model_compression_toolkit.ptq.runner import ptq_runner
 if FOUND_TF:
     import tensorflow as tf
+    from tensorflow.keras.layers import Layer
+    from tensorflow.keras.models import Model
     from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
     from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
     from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation
-    from tensorflow.keras.models import Model
     from model_compression_toolkit.core.keras.constants import DEFAULT_TP_MODEL
-    from model_compression_toolkit.qat.keras.qat_model_builder import QATKerasModelBuilder
+    from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
+    from model_compression_toolkit import get_target_platform_capabilities
+    from model_compression_toolkit import quantizers_infrastructure as qi
     from model_compression_toolkit import get_target_platform_capabilities
-    from model_compression_toolkit import qunatizers_infrastructure as qi
+    from model_compression_toolkit.core import common
+    from model_compression_toolkit.core.common import BaseNode
+    from model_compression_toolkit.core.common.constants import TENSORFLOW
+    from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+    from model_compression_toolkit.qat.common.qat_config import _is_qat_applicable
+    from model_compression_toolkit.core.keras.constants import DEFAULT_TP_MODEL
+    from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
+    from model_compression_toolkit.qat.keras.quantizer.quantization_builder import quantization_builder
+    from model_compression_toolkit.qat.common.qat_config import QATConfig
+    from model_compression_toolkit import quantizers_infrastructure as qi
     DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
+    def qat_wrapper(n: common.BaseNode, layer: Layer, qat_config):
+        """
+        A function which takes a computational graph node and a keras layer and perform the quantization wrapping
+        Args:
+            n: A node of mct graph.
+            layer: A keras layer
+        Returns: Wrapped layer
+        """
+        if _is_qat_applicable(n, DEFAULT_KERAS_INFO):
+            weights_quantizers, activation_quantizers = quantization_builder(n, qat_config, DEFAULT_KERAS_INFO)
+            return qi.KerasQuantizationWrapper(layer, weights_quantizers, activation_quantizers)
+        else:
+            return layer
     def keras_quantization_aware_training_init(in_model: Model,
                                                representative_data_gen: Callable,
                                                target_kpi: KPI = None,
                                                core_config: CoreConfig = CoreConfig(),
+                                               qat_config: QATConfig = QATConfig(),
                                                fw_info: FrameworkInfo = DEFAULT_KERAS_INFO,
                                                target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC):
         """
@@ -70,6 +102,7 @@ if FOUND_TF:
              representative_data_gen (Callable): Dataset used for initial calibration.
              target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired.
              core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
+             qat_config (QATConfig): QAT configuration
              fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.).  `Default Keras info <https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/core/keras/default_framework_info.py>`_
              target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to.
@@ -90,14 +123,14 @@ if FOUND_TF:
              >>> from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
              >>> model = MobileNetV2()
-            Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
-            In this example a random dataset of 10 batches each containing 4 images is used.
+             Create a random dataset generator, for required number of calibration iterations (num_calibration_batches):
+             In this example a random dataset of 10 batches each containing 4 images is used.
-            >>> import numpy as np
-            >>> num_calibration_batches = 10
-            >>> def repr_datagen():
-            >>>     for _ in range(num_calibration_batches):
-            >>>         yield [np.random.random((4, 224, 224, 3))]
+             >>> import numpy as np
+             >>> num_calibration_batches = 10
+             >>> def repr_datagen():
+             >>>     for _ in range(num_calibration_batches):
+             >>>         yield [np.random.random((4, 224, 224, 3))]
              Create a MCT core config, containing the quantization configuration:
@@ -154,24 +187,23 @@ if FOUND_TF:
         tg = ptq_runner(tg, representative_data_gen, core_config, fw_info, fw_impl, tb_w)
-        qat_model, user_info = QATKerasModelBuilder(graph=tg, fw_info=fw_info).build_model()
+        _qat_wrapper = partial(qat_wrapper, qat_config=qat_config)
+        qat_model, user_info = KerasModelBuilder(graph=tg, fw_info=fw_info, wrapper=_qat_wrapper).build_model()
         user_info.mixed_precision_cfg = bit_widths_config
         #TODO: remove the last output after updating documentation.
         return qat_model, user_info, {}
-    def keras_quantization_aware_training_finalize(in_model: Model):
+    def keras_quantization_aware_training_finalize(in_model: Model) -> Model:
         """
-         Convert a model fine-tuned by the user to a network without QuantizeWrappers. The exported
-         model contains float (fake-quantized) parameters and fake-quantiztion layers for quantizing
-         the activations
+         Convert a model fine-tuned by the user (Trainable quantizers) to a model with Inferable quantizers.
          Args:
-             in_model (Model): Keras model to remove QuantizeWrappers.
+             in_model (Model): Keras model to replace TrainableQuantizer with InferableQuantizer
          Returns:
-             A quantized model without QuantizeWrappers.
+             A quantized model with Inferable quantizers
          Examples:
@@ -216,37 +248,12 @@ if FOUND_TF:
              >>> quantized_model = mct.keras_quantization_aware_training_finalize(quantized_model)
          """
         def _export(layer):
             if isinstance(layer, qi.KerasQuantizationWrapper):
-                if layer.dispatcher.is_weights_quantization:
-                    new_layer = layer.layer.__class__.from_config(layer.layer.get_config())
-                    with tf.name_scope(new_layer.name):
-                        new_layer.build(layer.input_shape)
-                    weights_list = []
-                    for w in new_layer.weights:
-                        val = None
-                        for qw in layer.weights:
-                            if w.name in qw.name:
-                                attribute_name = w.name.split('/')[-1].split(':')[0]
-                                if attribute_name in layer.dispatcher.weight_quantizers.keys():
-                                    quantizer = layer.dispatcher.weight_quantizers.get(attribute_name)
-                                    val = quantizer(qw, False)
-                                else:
-                                    val = qw
-                                val = val.numpy()
-                        if val is None:
-                            Logger.error(f'Could not match weight name: {w.name}')
-                        weights_list.append(val)
-                    new_layer.set_weights(weights_list)
-                    new_layer.trainable = False
-                    return new_layer
-                else:
-                    Logger.error(f'Undefined quantize_config')
-            else:
-                return layer
-        # clone each layer in the model and apply _export to layers wrapped with a QuantizeWrapper.
+                layer.convert_to_inferable_quantizers()
+            return layer
+        # clone each layer in the model and apply _export to layers with TrainableQuantizeWrappers
         exported_model = tf.keras.models.clone_model(in_model, input_tensors=None, clone_function=_export)
         return exported_model
@@ -257,10 +264,10 @@ else:
     def keras_quantization_aware_training_init(*args, **kwargs):
         Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
                         'when using keras_quantization_aware_training_init. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover
     def keras_quantization_aware_training_finalize(*args, **kwargs):
         Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
                         'when using keras_quantization_aware_training_finalize. '
-                        'Could not find Tensorflow package.')
+                        'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/qat/keras/quantizer/__init__.py CHANGED Viewed

@@ -12,3 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import model_compression_toolkit.qat.keras.quantizer.ste_rounding.symmetric_ste
+import model_compression_toolkit.qat.keras.quantizer.ste_rounding.uniform_ste

model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py ADDED Viewed

@@ -0,0 +1,49 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Union
+from model_compression_toolkit.core.common import Logger
+from model_compression_toolkit.core.common.constants import FOUND_TF
+from model_compression_toolkit.quantizers_infrastructure import TrainableQuantizerWeightsConfig, \
+    TrainableQuantizerActivationConfig, BaseKerasTrainableQuantizer
+if FOUND_TF:
+    class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer):
+        """
+        A base class for trainable Keras quantizer for QAT.
+        """
+        def __init__(self,
+                     quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
+            """
+            Initializes BaseKerasQATTrainableQuantizer object.
+            Args:
+                quantization_config: quantizer config class contains all the information about a quantizer configuration.
+            """
+            super().__init__(quantization_config)
+else:
+    class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer):
+        def __init__(self,
+                     quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
+            super().__init__(quantization_config)
+            Logger.critical('Installing tensorflow and tensorflow_model_optimization is mandatory '
+                            'when using BaseKerasQATTrainableQuantizer. '
+                            'Could not find Tensorflow package.')  # pragma: no cover

model_compression_toolkit/qat/keras/quantizer/quant_utils.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import tensorflow as tf
+from typing import Tuple
+def adjust_range_to_include_zero(range_min: tf.Tensor,
+                                 range_max: tf.Tensor,
+                                 n_bits: int) -> Tuple[tf.Tensor, tf.Tensor]:
+    """
+    Adjusting the quantization range to include representation of 0.0 in the quantization grid.
+    For per_channel quantization range_min\range_max should be tensors in the specific shape that allows
+    quantization along the channel_axis.
+    Args:
+        range_min: min bound of the quantization range (before adjustment).
+        range_max: max bound of the quantization range (before adjustment).
+        n_bits: Number of bits to quantize the tensor.
+    Returns: adjusted quantization range
+    """
+    scale = (range_max - range_min) / (2 ** n_bits - 1)
+    min_range_adj = scale * tf.round(range_min / scale)
+    max_range_adj = range_max - range_min + min_range_adj
+    min_positive = range_min > 0
+    max_negative = range_max < 0
+    mid_range = tf.logical_and(tf.logical_not(min_positive), tf.logical_not(max_negative))
+    min_positive = tf.cast(min_positive, tf.float32)
+    max_negative = tf.cast(max_negative, tf.float32)
+    mid_range = tf.cast(mid_range, tf.float32)
+    min_range_adj = min_range_adj * mid_range + max_negative * range_min
+    max_range_adj = max_range_adj * mid_range + min_positive * range_max
+    return min_range_adj, max_range_adj

model_compression_toolkit/qat/keras/quantizer/quantization_builder.py ADDED Viewed

@@ -0,0 +1,77 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Tuple, Dict, List
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.get_quantizer_config import \
+    get_trainable_quantizer_weights_config, get_trainable_quantizer_activation_config, \
+    get_trainable_quantizer_quantization_candidates
+from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_quantizer import BaseKerasQATTrainableQuantizer
+from model_compression_toolkit.qat.common.qat_config import QATConfig
+from model_compression_toolkit.quantizers_infrastructure import QuantizationTarget
+from model_compression_toolkit.quantizers_infrastructure.trainable_infrastructure.common.get_quantizers import \
+    get_trainable_quantizer_class
+def quantization_builder(n: common.BaseNode,
+                         qat_config: QATConfig,
+                         fw_info: FrameworkInfo,
+                         ) -> Tuple[Dict[str, BaseKerasQATTrainableQuantizer], List[BaseKerasQATTrainableQuantizer]]:
+    """
+    Build quantizers for a node according to its quantization configuration.
+    Args:
+        n: Node to build its QuantizeConfig.
+        qat_config (QATConfig): QAT configuration
+        fw_info: Framework information (e.g., mapping from layers to their attributes to quantize).
+    Returns:
+        weights_quantizers: A dictionary between a weight's name to its quantizer.
+        activation_quantizers: A list of activations quantization, one for each layer output.
+    """
+    if len(n.candidates_quantization_cfg) > 1:
+        wq_cand, aq_cand = get_trainable_quantizer_quantization_candidates(n)
+    else:
+        wq_cand, aq_cand = None, None
+    weight_quantizers = {}
+    if n.is_weights_quantization_enabled():
+        quant_method = n.final_weights_quantization_cfg.weights_quantization_method
+        quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Weights,
+                                                        qat_config.weight_training_method,
+                                                        quant_method,
+                                                        BaseKerasQATTrainableQuantizer)
+        attributes = fw_info.get_kernel_op_attributes(n.type)
+        for attr in attributes:
+            weight_quantizers.update({attr: quantizer_class(get_trainable_quantizer_weights_config(n, wq_cand),
+                                                            **qat_config.weight_quantizer_params_override)})
+    activation_quantizers = []
+    if n.is_activation_quantization_enabled():
+        quant_method = n.final_activation_quantization_cfg.activation_quantization_method
+        # single output -> normalize to list of output_shapes
+        output_shapes = n.output_shape if isinstance(n.output_shape[0], (list, tuple)) else [n.output_shape]
+        quantizer_class = get_trainable_quantizer_class(QuantizationTarget.Activation,
+                                                        qat_config.activation_training_method,
+                                                        quant_method,
+                                                        BaseKerasQATTrainableQuantizer)
+        activation_quantizers = [quantizer_class(get_trainable_quantizer_activation_config(n, aq_cand),
+                                                 **qat_config.activation_quantizer_params_override)] * len(output_shapes)
+    return weight_quantizers, activation_quantizers

mct-nightly 1.7.1.31122022.post351__py3-none-any.whl → 1.8.0.1042023.post423__py3-none-any.whl

mct-nightly 1.7.1.31122022.post351py3-none-any.whl → 1.8.0.1042023.post423py3-none-any.whl