PyPI - mct-nightly - Versions diffs - 1.11.0.20240130.post401__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl - Mend

mct-nightly 1.11.0.20240130.post401py3-none-any.whl → 1.11.0.20240201.post434py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 import os
+from pathlib import Path
 import tempfile
 from typing import Callable
@@ -56,14 +57,20 @@ class FakelyQuantTFLiteExporter(FakelyQuantKerasExporter):
         """
         # Use Keras exporter to quantize model's weights before converting it to TFLite.
-        # Since exporter saves the model, we use a tmp path for saving, and then we delete it automatically.
-        with tempfile.NamedTemporaryFile(suffix=DEFAULT_KERAS_EXPORT_EXTENTION) as tmp_file:
-            FakelyQuantKerasExporter(self.model,
-                                     self.is_layer_exportable_fn,
-                                     tmp_file.name,
-                                     verbose=False).export()
+        # Since exporter saves the model, we use a tmp path for saving, and then we delete it.
+        handle, tmp_file = tempfile.mkstemp(DEFAULT_KERAS_EXPORT_EXTENTION)
+        # Close handle right away, the file is going to be reopenned by Keras exporter
+        os.close(handle)
+        try:
+            custom_objects = FakelyQuantKerasExporter(self.model,
+                                                      self.is_layer_exportable_fn,
+                                                      tmp_file,
+                                                      verbose=False).export()
-            model = keras_load_quantized_model(tmp_file.name)
+            model = keras_load_quantized_model(tmp_file)
+        # Ensures artifact is removed even in case of error
+        finally:
+            Path(tmp_file).unlink(missing_ok=True)
         self.exported_model = tf.lite.TFLiteConverter.from_keras_model(model).convert()
         Logger.info(f'Exporting FQ tflite model to: {self.save_model_path}')

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py CHANGED Viewed

@@ -26,8 +26,11 @@ from mct_quantizers import pytorch_quantizers
 # ONNX opset version 16 is supported from PyTorch 1.12
 if version.parse(torch.__version__) < version.parse("1.12"):
     OPSET_VERSION = 15
-else:
+elif version.parse("1.12.0") <= version.parse(torch.__version__) < version.parse("1.13.0"):
     OPSET_VERSION = 16
+else:
+    # ONNX opset version 17 is supported from PyTorch 1.13
+    OPSET_VERSION = 17
 class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):

model_compression_toolkit/gptq/common/gptq_config.py CHANGED Viewed

@@ -14,9 +14,7 @@
 # ==============================================================================
 from enum import Enum
 from typing import Callable, Any, Dict
-from model_compression_toolkit.core.common.defaultdict import DefaultDict
-from model_compression_toolkit.core import common
-from model_compression_toolkit.gptq.common.gptq_constants import QUANT_PARAM_LEARNING_STR, MAX_LSB_STR, REG_DEFAULT
+from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
 class RoundingType(Enum):

model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py CHANGED Viewed

@@ -24,7 +24,7 @@ from mct_quantizers import QuantizationTarget
 from model_compression_toolkit.gptq.common.gptq_constants import AUXVAR, PTQ_THRESHOLD
 from model_compression_toolkit.gptq.keras.quantizer import quant_utils as qutils
 from model_compression_toolkit.constants import THRESHOLD
-from model_compression_toolkit.core.common.defaultdict import DefaultDict
+from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
 from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
 from mct_quantizers import mark_quantizer

model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py CHANGED Viewed

@@ -16,7 +16,7 @@ import torch
 import torch.nn as nn
 from typing import Dict
 import numpy as np
-from model_compression_toolkit.core.common.defaultdict import DefaultDict
+from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
 from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper

model_compression_toolkit/target_platform_capabilities/constants.py CHANGED Viewed

@@ -24,4 +24,21 @@ LATEST = 'latest'
 DEFAULT_TP_MODEL = 'default'
 IMX500_TP_MODEL = 'imx500'
 TFLITE_TP_MODEL = 'tflite'
-QNNPACK_TP_MODEL = 'qnnpack'
+QNNPACK_TP_MODEL = 'qnnpack'
+# TP Attributes
+KERNEL_ATTR = "kernel_attr"
+BIAS_ATTR = "bias_attr"
+# TODO: this is duplicated from the core frameworks constants files, because the original consts can't be used here
+#  duo to circular dependency. It might be best to extract the constants from the core file and put them here (in a
+#  separate changeset, because it affects the entire code)
+KERAS_KERNEL = "kernel"
+KERAS_DEPTHWISE_KERNEL = "depthwise_kernel"
+BIAS = "bias"
+PYTORCH_KERNEL = "weight"
+# Configuration attributes names
+WEIGHTS_N_BITS = 'weights_n_bits'
+WEIGHTS_QUANTIZATION_METHOD = 'weights_quantization_method'

model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py CHANGED Viewed

@@ -21,7 +21,7 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.targ
     get_default_quantization_config_options, TargetPlatformModel
 from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \
-    QuantizationConfigOptions
+    QuantizationConfigOptions, AttributeQuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSet, OperatorSetConcat
 from mct_quantizers import QuantizationMethod

model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py CHANGED Viewed

@@ -14,58 +14,135 @@
 # ==============================================================================
 import copy
-from typing import List
+from typing import List, Dict, Union, Any
 from mct_quantizers import QuantizationMethod
+from model_compression_toolkit.logger import Logger
-class OpQuantizationConfig:
+def clone_and_edit_object_params(obj: Any, **kwargs: Dict) -> Any:
     """
-    OpQuantizationConfig is a class to configure the quantization parameters of an operator.
+    Clones the given object and edit some of its parameters.
+    Args:
+        obj: An object to clone.
+        **kwargs: Keyword arguments to edit in the cloned object.
+    Returns:
+        Edited copy of the given object.
     """
+    obj_copy = copy.deepcopy(obj)
+    for k, v in kwargs.items():
+        assert hasattr(obj_copy,
+                       k), f'Edit parameter is possible only for existing parameters in the given object, ' \
+                           f'but {k} is not a parameter of {obj_copy}.'
+        setattr(obj_copy, k, v)
+    return obj_copy
+class AttributeQuantizationConfig:
+    """
+    Hold the quantization configuration of a weight attribute of a layer.
+    """
     def __init__(self,
-                 activation_quantization_method: QuantizationMethod,
                  weights_quantization_method: QuantizationMethod,
-                 activation_n_bits: int,
                  weights_n_bits: int,
                  weights_per_channel_threshold: bool,
                  enable_weights_quantization: bool,
+                 lut_values_bitwidth: Union[int, None],  # If None - set 8 in hptq, o.w use it
+                 ):
+        """
+        Initializes an attribute quantization config.
+        Args:
+            weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
+            weights_n_bits (int): Number of bits to quantize the coefficients.
+            weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
+            enable_weights_quantization (bool): Whether to quantize the model weights or not.
+            lut_values_bitwidth (int): Number of bits to use when quantizing in look-up-table.
+        """
+        self.weights_quantization_method = weights_quantization_method
+        self.weights_n_bits = weights_n_bits
+        self.weights_per_channel_threshold = weights_per_channel_threshold
+        self.enable_weights_quantization = enable_weights_quantization
+        self.lut_values_bitwidth = lut_values_bitwidth
+    def clone_and_edit(self, **kwargs):
+        """
+        Clone the quantization config and edit some of its attributes.
+        Args:
+            **kwargs: Keyword arguments to edit the configuration to clone.
+        Returns:
+            Edited quantization configuration.
+        """
+        return clone_and_edit_object_params(self, **kwargs)
+    def __eq__(self, other):
+        """
+        Is this configuration equal to another object.
+        Args:
+            other: Object to compare.
+        Returns:
+            Whether this configuration is equal to another object or not.
+        """
+        if not isinstance(other, AttributeQuantizationConfig):
+            return False
+        return self.weights_quantization_method == other.weights_quantization_method and \
+            self.weights_n_bits == other.weights_n_bits and \
+            self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
+            self.enable_weights_quantization == other.enable_weights_quantization and \
+            self.lut_values_bitwidth == other.lut_values_bitwidth
+class OpQuantizationConfig:
+    """
+    OpQuantizationConfig is a class to configure the quantization parameters of an operator.
+    """
+    def __init__(self,
+                 default_weight_attr_config: AttributeQuantizationConfig,
+                 attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
+                 activation_quantization_method: QuantizationMethod,
+                 activation_n_bits: int,
                  enable_activation_quantization: bool,
                  quantization_preserving: bool,
                  fixed_scale: float,
                  fixed_zero_point: int,
-                 weights_multiplier_nbits: int,  # If None - set 8 in hptq, o.w use it
-                 simd_size: int):
+                 simd_size: int
+                 ):
         """
         Args:
+            default_weight_attr_config (AttributeQuantizationConfig): A default attribute quantization configuration for the operation.
+            attr_weights_configs_mapping (dict): A mapping between an op attribute name and its quantization configuration.
             activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
-            weights_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for weights quantization.
             activation_n_bits (int): Number of bits to quantize the activations.
-            weights_n_bits (int): Number of bits to quantize the coefficients.
-            weights_per_channel_threshold (bool): Whether to quantize the weights per-channel or not (per-tensor).
-            enable_weights_quantization (bool): Whether to quantize the model weights or not.
             enable_activation_quantization (bool): Whether to quantize the model activations or not.
             quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
             fixed_scale (float): Scale to use for an operator quantization parameters.
             fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
-            weights_multiplier_nbits (int): Number of bits to use when quantizing in look-up-table.
-            simd_size (int): An integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
+            simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
         """
+        self.default_weight_attr_config = default_weight_attr_config
+        self.attr_weights_configs_mapping = attr_weights_configs_mapping
         self.activation_quantization_method = activation_quantization_method
-        self.weights_quantization_method = weights_quantization_method
         self.activation_n_bits = activation_n_bits
-        self.weights_n_bits = weights_n_bits
-        self.weights_per_channel_threshold = weights_per_channel_threshold
-        self.enable_weights_quantization = enable_weights_quantization
         self.enable_activation_quantization = enable_activation_quantization
         self.quantization_preserving = quantization_preserving
         self.fixed_scale = fixed_scale
         self.fixed_zero_point = fixed_zero_point
-        self.eights_lut_values_bitwidth = weights_multiplier_nbits
         self.simd_size = simd_size
     def get_info(self):
@@ -76,22 +153,28 @@ class OpQuantizationConfig:
         """
         return self.__dict__
-    def clone_and_edit(self, **kwargs):
+    def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs):
         """
         Clone the quantization config and edit some of its attributes.
         Args:
+            attr_to_edit: A mapping between attributes names to edit and their parameters that
+            should be edited to a new value.
             **kwargs: Keyword arguments to edit the configuration to clone.
         Returns:
             Edited quantization configuration.
         """
-        qc = copy.deepcopy(self)
-        for k, v in kwargs.items():
-            assert hasattr(qc,
-                           k), f'Edit attributes is possible only for existing attributes in configuration, ' \
-                               f'but {k} is not an attribute of {qc}'
-            setattr(qc, k, v)
+        qc = clone_and_edit_object_params(self, **kwargs)
+        # optionally: editing specific parameters in the config of specified attributes
+        edited_attrs = copy.deepcopy(qc.attr_weights_configs_mapping)
+        for attr_name, attr_cfg in qc.attr_weights_configs_mapping.items():
+            if attr_name in attr_to_edit:
+                edited_attrs[attr_name] = attr_cfg.clone_and_edit(**attr_to_edit[attr_name])
+        qc.attr_weights_configs_mapping = edited_attrs
         return qc
     def __eq__(self, other):
@@ -105,14 +188,12 @@ class OpQuantizationConfig:
         """
         if not isinstance(other, OpQuantizationConfig):
             return False
-        return self.activation_quantization_method == other.activation_quantization_method and \
-               self.weights_quantization_method == other.weights_quantization_method and \
-               self.activation_n_bits == other.activation_n_bits and \
-               self.weights_n_bits == other.weights_n_bits and \
-               self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
-               self.enable_weights_quantization == other.enable_weights_quantization and \
-               self.enable_activation_quantization == other.enable_activation_quantization and \
-               self.simd_size==other.simd_size
+        return self.default_weight_attr_config == other.default_weight_attr_config and \
+            self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
+            self.activation_quantization_method == other.activation_quantization_method and \
+            self.activation_n_bits == other.activation_n_bits and \
+            self.enable_activation_quantization == other.enable_activation_quantization and \
+            self.simd_size == other.simd_size
 class QuantizationConfigOptions(object):
@@ -177,6 +258,67 @@ class QuantizationConfigOptions(object):
             self.__edit_quantization_configuration(qc, kwargs)
         return qc_options
+    def clone_and_edit_weight_attribute(self, attrs: List[str] = None, **kwargs):
+        """
+        Clones the quantization configurations and edits some of their attributes' parameters.
+        Args:
+            attrs: attributes names to clone their configurations. If None is provided, updating the configurations
+                of all attributes in the operation attributes config mapping.
+            **kwargs: Keyword arguments to edit in the attributes configuration.
+        Returns:
+            QuantizationConfigOptions with edited attributes configurations.
+        """
+        qc_options = copy.deepcopy(self)
+        for qc in qc_options.quantization_config_list:
+            if attrs is None:
+                attrs_to_update = list(qc.attr_weights_configs_mapping.keys())
+            else:
+                if not isinstance(attrs, List):
+                    Logger.error(f"Expecting a list of attribute but got {type(attrs)}.")
+                attrs_to_update = attrs
+            for attr in attrs_to_update:
+                if qc.attr_weights_configs_mapping.get(attr) is None:
+                    Logger.error(f'Edit attributes is possible only for existing attributes '
+                                 f'in the configuration weights config mapping, but {attr} is not an attribute of {qc}.')
+                self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs)
+        return qc_options
+    def clone_and_map_weights_attr_keys(self, layer_attrs_mapping: Union[Dict[str, str], None]):
+        """
+       Clones the quantization configuration options and edits the keys in each configuration attributes config mapping,
+       based on the given attributes names mapping.
+        Args:
+            layer_attrs_mapping: A mapping between attributes names.
+        Returns:
+            QuantizationConfigOptions with edited attributes names.
+        """
+        qc_options = copy.deepcopy(self)
+        for qc in qc_options.quantization_config_list:
+            if layer_attrs_mapping is None:
+                qc.attr_weights_configs_mapping = {}
+            else:
+                new_attr_mapping = {}
+                for attr in list(qc.attr_weights_configs_mapping.keys()):
+                    new_key = layer_attrs_mapping.get(attr)
+                    if new_key is None:
+                        Logger.error(f"Attribute {attr} does not exist in the given attribute mapping.")
+                    new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr)
+                qc.attr_weights_configs_mapping.update(new_attr_mapping)
+        return qc_options
     def __edit_quantization_configuration(self, qc, kwargs):
         for k, v in kwargs.items():
             assert hasattr(qc,

model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from typing import List, Any
+from typing import List, Any, Dict
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.current_tpc import  _current_tpc
@@ -22,21 +22,25 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.oper
     OperatorsSetBase
 class OperationsSetToLayers(TargetPlatformCapabilitiesComponent):
     """
     Associate an OperatorsSet to a list of framework's layers.
     """
     def __init__(self,
                  op_set_name: str,
-                 layers: List[Any]):
+                 layers: List[Any],
+                 attr_mapping: Dict[str, Any] = None):
         """
         Args:
             op_set_name (str): Name of OperatorsSet to associate with layers.
             layers (List[Any]): List of layers/FilterLayerParams to associate with OperatorsSet.
+            attr_mapping (dict): A mapping between a general attribute name to a DefaultDict that maps a layer
++                type to the layer's framework name of this attribute (the dictionary type is not specified to
++                handle circular dependency).
         """
         self.layers = layers
+        self.attr_mapping = attr_mapping
         super(OperationsSetToLayers, self).__init__(name=op_set_name)
         _current_tpc.get().remove_opset_from_not_used_list(op_set_name)

model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py CHANGED Viewed

@@ -188,6 +188,16 @@ class TargetPlatformCapabilities(ImmutableClass):
                 qco = self.tp_model.get_config_options_by_operators_set(op2layers.name)
                 if qco is None:
                     qco = self.tp_model.default_qco
+                # here, we need to take care of mapping a general attribute name into a framework and
+                # layer type specific attribute name.
+                # attr_mapping is a mapping between an attribute generic name to a dictionary that maps each
+                # layer type to its framework-specific attribute name.
+                # in the loop below, v is the inner dictionary.
+                layer_attrs_mapping = None if op2layers.attr_mapping is None else \
+                    {k: v.get(l) for k, v in op2layers.attr_mapping.items()}
+                qco = qco.clone_and_map_weights_attr_keys(layer_attrs_mapping)
                 if isinstance(l, LayerFilterParams):
                     filterlayer2qco.update({l: qco})
                 else:
@@ -231,4 +241,4 @@ class TargetPlatformCapabilities(ImmutableClass):
         Returns: Check if the TP model defines that padding due to SIMD constrains occurs.
         """
-        return self.tp_model.is_simd_padding
+        return self.tp_model.is_simd_padding

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py CHANGED Viewed

@@ -15,8 +15,12 @@
 from typing import List, Tuple
 import model_compression_toolkit as mct
+from model_compression_toolkit.constants import FLOAT_BITWIDTH
+from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
 from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
     TargetPlatformModel
+from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
+    AttributeQuantizationConfig
 tp = mct.target_platform
@@ -32,14 +36,14 @@ def get_tp_model() -> TargetPlatformModel:
     Returns: A TargetPlatformModel object.
     """
-    base_config, mixed_precision_cfg_list = get_op_quantization_configs()
-    return generate_tp_model(default_config=base_config,
+    base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
+    return generate_tp_model(default_config=default_config,
                              base_config=base_config,
                              mixed_precision_cfg_list=mixed_precision_cfg_list,
                              name='imx500_tp_model')
-def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
+def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
     """
     Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
     In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -48,21 +52,63 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
     Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
     """
+    # TODO: currently, we don't want to quantize any attribute but the kernel by default,
+    #  to preserve the current behavior of MCT, so quantization is disabled for all other attributes.
+    #  Other quantization parameters are set to what we eventually want to quantize by default
+    #  when we enable multi-attributes quantization - THIS NEED TO BE MODIFIED IN ALL TP MODELS!
+    # define a default quantization config for all non-specified weights attributes.
+    default_weight_attr_config = AttributeQuantizationConfig(
+        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
+        weights_n_bits=8,
+        weights_per_channel_threshold=False,
+        enable_weights_quantization=False,  # TODO: this will changed to True once implementing multi-attributes quantization
+        lut_values_bitwidth=None)
+    # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
+    kernel_base_config = AttributeQuantizationConfig(
+        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
+        weights_n_bits=8,
+        weights_per_channel_threshold=True,
+        enable_weights_quantization=True,
+        lut_values_bitwidth=None)
+    # define a quantization config to quantize the bias (for layers where there is a bias attribute).
+    bias_config = AttributeQuantizationConfig(
+        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
+        weights_n_bits=FLOAT_BITWIDTH,
+        weights_per_channel_threshold=False,
+        enable_weights_quantization=False,
+        lut_values_bitwidth=None)
     # Create a quantization config.
     # A quantization configuration defines how an operator
     # should be quantized on the modeled hardware:
-    eight_bits = tp.OpQuantizationConfig(
+    # We define a default config for operation without kernel attribute.
+    # This is the default config that should be used for non-linear operations.
+    eight_bits_default = tp.OpQuantizationConfig(
+        default_weight_attr_config=default_weight_attr_config,
+        attr_weights_configs_mapping={},
+        activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
+        activation_n_bits=8,
+        enable_activation_quantization=True,
+        quantization_preserving=False,
+        fixed_scale=None,
+        fixed_zero_point=None,
+        simd_size=32)
+    # We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
+    linear_eight_bits = tp.OpQuantizationConfig(
+        default_weight_attr_config=default_weight_attr_config,
+        attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
         activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
-        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
         activation_n_bits=8,
-        weights_n_bits=8,
-        weights_per_channel_threshold=True,
-        enable_weights_quantization=True,
         enable_activation_quantization=True,
         quantization_preserving=False,
         fixed_scale=None,
         fixed_zero_point=None,
-        weights_multiplier_nbits=None,
         simd_size=32)
     # To quantize a model using mixed-precision, create
@@ -70,14 +116,14 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
     # In this example, we quantize some operations' weights
     # using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
     # to quantize the operations' activations using LUT.
-    four_bits = eight_bits.clone_and_edit(weights_n_bits=4,
-                                          simd_size=eight_bits.simd_size*2)
-    two_bits = eight_bits.clone_and_edit(weights_n_bits=2,
-                                         simd_size=eight_bits.simd_size*4)
+    four_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}},
+                                                 simd_size=linear_eight_bits.simd_size * 2)
+    two_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}},
+                                                simd_size=linear_eight_bits.simd_size * 4)
-    mixed_precision_cfg_list = [eight_bits, four_bits, two_bits]
+    mixed_precision_cfg_list = [linear_eight_bits, four_bits, two_bits]
-    return eight_bits, mixed_precision_cfg_list
+    return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
 def generate_tp_model(default_config: OpQuantizationConfig,
@@ -121,10 +167,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
         generated_tpc.set_simd_padding(is_simd_padding=True)
         # May suit for operations like: Dropout, Reshape, etc.
+        default_qco = tp.get_default_quantization_config_options()
         tp.OperatorsSet("NoQuantization",
-                        tp.get_default_quantization_config_options().clone_and_edit(
-                            enable_weights_quantization=False,
-                            enable_activation_quantization=False))
+                        default_qco.clone_and_edit(enable_activation_quantization=False)
+                        .clone_and_edit_weight_attribute(enable_weights_quantization=False))
         # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
         mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py CHANGED Viewed

@@ -14,7 +14,11 @@
 # ==============================================================================
 import tensorflow as tf
 from packaging import version
+from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.constants import FOUND_SONY_CUSTOM_LAYERS
+from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \
+    KERAS_KERNEL, BIAS_ATTR, BIAS
 if FOUND_SONY_CUSTOM_LAYERS:
     from sony_custom_layers.keras.object_detection.ssd_post_process import SSDPostProcess
@@ -85,14 +89,26 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
     with keras_tpc:
         tp.OperationsSetToLayers("NoQuantization", no_quant_list)
-        tp.OperationsSetToLayers("Conv", [Conv2D,
-                                          DepthwiseConv2D,
-                                          Conv2DTranspose,
-                                          tf.nn.conv2d,
-                                          tf.nn.depthwise_conv2d,
-                                          tf.nn.conv2d_transpose])
-        tp.OperationsSetToLayers("FullyConnected", [Dense])
+        tp.OperationsSetToLayers("Conv",
+                                 [Conv2D,
+                                  DepthwiseConv2D,
+                                  Conv2DTranspose,
+                                  tf.nn.conv2d,
+                                  tf.nn.depthwise_conv2d,
+                                  tf.nn.conv2d_transpose],
+                                 # we provide attributes mapping that maps each layer type in the operations set
+                                 # that has weights attributes with provided quantization config (in the tp model) to
+                                 # its framework-specific attribute name.
+                                 # note that a DefaultDict should be provided if not all the layer types in the
+                                 # operation set are provided separately in the mapping.
+                                 attr_mapping={
+                                     KERNEL_ATTR: DefaultDict({
+                                         DepthwiseConv2D: KERAS_DEPTHWISE_KERNEL,
+                                         tf.nn.depthwise_conv2d: KERAS_DEPTHWISE_KERNEL}, default_value=KERAS_KERNEL),
+                                     BIAS_ATTR: DefaultDict(default_value=BIAS)})
+        tp.OperationsSetToLayers("FullyConnected", [Dense],
+                                 attr_mapping={KERNEL_ATTR: DefaultDict(default_value=KERAS_KERNEL),
+                                               BIAS_ATTR: DefaultDict(default_value=BIAS)})
         tp.OperationsSetToLayers("AnyReLU", [tf.nn.relu,
                                              tf.nn.relu6,
                                              tf.nn.leaky_relu,

mct-nightly 1.11.0.20240130.post401__py3-none-any.whl → 1.11.0.20240201.post434__py3-none-any.whl

mct-nightly 1.11.0.20240130.post401py3-none-any.whl → 1.11.0.20240201.post434py3-none-any.whl