PyPI - mct-nightly - Versions diffs - 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240419.358__py3-none-any.whl - Mend

mct-nightly 2.0.0.20240417.406py3-none-any.whl → 2.0.0.20240419.358py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import torch
+from model_compression_toolkit.core.common.substitutions.remove_identity import remove_identity_node
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
+class RemoveIdentity(common.BaseSubstitution):
+    """
+    Remove `torch.nn.Identity` layers from the graph.
+    """
+    def __init__(self):
+        nodes = NodeOperationMatcher(torch.nn.Identity)
+        super().__init__(matcher_instance=nodes)
+    def substitute(self,
+                   graph: Graph,
+                   node: BaseNode) -> Graph:
+        """
+        The method to perform the substitution of the `torch.nn.Identity` node by
+        reconnecting its input directly to its output, effectively removing the node
+        from the graph.
+        Args:
+            graph: The current graph of operations where the node resides.
+            node: The specific `BaseNode` that is matched to be an Identity operation.
+        Returns:
+            Graph: The updated graph after removing the identity node.
+        """
+        return remove_identity_node(graph, node)

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py CHANGED Viewed

@@ -46,7 +46,7 @@ def residual_collapsing_fn(first_node: BaseNode,
     Returns:
         The modified layer node's weights: kernel
     """
-    if first_node.type == Conv2d:
+    if first_node.is_match_type(Conv2d):
         # Get nodes attributes
         kernel = first_node.get_weights_by_keys(kernel_str)
         (Cout, Cin, kH, kW) = kernel.shape

model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py CHANGED Viewed

@@ -76,9 +76,9 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
         pruned_parameters = {}
         mask_bool = output_mask.astype(bool)
         node.weights = pruned_parameters
-        if node.type == torch.nn.BatchNorm2d:
+        if node.is_match_type(torch.nn.BatchNorm2d):
             node.framework_attr[NUM_FEATURES] = int(np.sum(input_mask))
-        elif node.type == torch.nn.PReLU:
+        elif node.is_match_type(torch.nn.PReLU):
             if node.framework_attr[NUM_PARAMETERS] > 1:
                 node.framework_attr[NUM_PARAMETERS] = int(np.sum(input_mask))
             else:
@@ -227,9 +227,9 @@ def _is_pytorch_node_pruning_section_edge(node: BaseNode) -> bool:
     """
     # Check if the node is a Conv2D or Conv2DTranspose layer with groups set to 1.
-    if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
+    if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
         return node.framework_attr[GROUPS] == 1
-    return node.type == torch.nn.Linear
+    return node.is_match_type(torch.nn.Linear)
 def _prune_pytorch_edge_node(node: BaseNode,
@@ -268,18 +268,18 @@ def _prune_pytorch_edge_node(node: BaseNode,
     if not is_exit_node:
         # Update 'out_channels' or 'out_features' attributes for entry nodes
         # Conv2d,ConvTranspose2d / Linear layers.
-        if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
+        if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
             node.framework_attr[OUT_CHANNELS] = int(np.sum(mask))
-        elif node.type == torch.nn.Linear:
+        elif node.is_match_type(torch.nn.Linear):
             node.framework_attr[OUT_FEATURES] = int(np.sum(mask))
         else:
             Logger.critical(f"{node.type} is currently not supported"
                              f"as an edge node in a pruning section")
     if is_exit_node:
-        if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
+        if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
             node.framework_attr[IN_CHANNELS] = int(np.sum(mask))
-        elif node.type == torch.nn.Linear:
+        elif node.is_match_type(torch.nn.Linear):
             node.framework_attr[IN_FEATURES] = int(np.sum(mask))
         else:
             Logger.critical(f"{node.type} is currently not supported"

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -58,6 +58,7 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.co
     FunctionalConvSubstitution
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.relu_bound_to_power_of_2 import \
     ReLUBoundToPowerOfTwo
+from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.remove_identity import RemoveIdentity
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.reshape_with_static_shapes import \
     ReshapeWithStaticShapes
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.residual_collapsing import \
@@ -238,7 +239,8 @@ class PytorchImplementation(FrameworkImplementation):
                 PermuteCallMethod(),
                 FunctionalConvSubstitution(fw_info),
                 FunctionalBatchNorm(),
-                FunctionalLayerNorm()]
+                FunctionalLayerNorm(),
+                RemoveIdentity()]
     def get_substitutions_pre_statistics_collection(self,
                                                     quant_config: QuantizationConfig
@@ -396,8 +398,8 @@ class PytorchImplementation(FrameworkImplementation):
         Returns: True if the node should be considered an interest point, False otherwise.
         """
-        if node.type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax, softmax, operator.add, add, cat,
-                         operator.concat]:
+        if any([node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
+                                                        softmax, operator.add, add, cat, operator.concat]]):
             return True
         return False
@@ -462,12 +464,12 @@ class PytorchImplementation(FrameworkImplementation):
         kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
         output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
-        if node.type is Conv2d or node.type is ConvTranspose2d:
+        if node.is_match_type(Conv2d) or node.is_match_type(ConvTranspose2d):
             # (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
             return np.prod([x for x in output_shape if x is not None]) * \
                    kernel_shape[input_channel_axis] * \
                    (kernel_shape[0] * kernel_shape[1])
-        elif node.type is Linear:
+        elif node.is_match_type(Linear):
             # IN * OUT
             return kernel_shape[0] * kernel_shape[1]
         else:
@@ -550,7 +552,6 @@ class PytorchImplementation(FrameworkImplementation):
         Returns:
             weight_quantizers: A dictionary between a weight's name to its quantizer.
             activation_quantizers: A list of activations quantization, one for each layer output.
         """
         return get_inferable_quantizers(node,

model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py CHANGED Viewed

@@ -62,7 +62,7 @@ def _get_mean_std_outputs(node: BaseNode,
     """
     mean_output, std_output = None, None
-    if node.type == BatchNorm2d:
+    if node.is_match_type(BatchNorm2d):
         mean_output = node.get_weights_by_keys(BETA)
         if node.get_weights_by_keys(GAMMA) is None:
             std_output = 1.0
@@ -72,7 +72,7 @@ def _get_mean_std_outputs(node: BaseNode,
             mean_output = 0.0
     else:
         next_node_list = graph.get_next_nodes(node)
-        bn_nodes = [bn_node for bn_node in next_node_list if bn_node.type == BatchNorm2d]
+        bn_nodes = [bn_node for bn_node in next_node_list if bn_node.is_match_type(BatchNorm2d)]
         if len(bn_nodes) != 0:
             bn_node = bn_nodes[0]
             moving_variance = bn_node.get_weights_by_keys(MOVING_VARIANCE)

model_compression_toolkit/core/quantization_prep_runner.py CHANGED Viewed

@@ -21,6 +21,7 @@ from tqdm import tqdm
 from model_compression_toolkit.core.common import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.model_collector import ModelCollector
 from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
@@ -38,7 +39,8 @@ def quantization_preparation_runner(graph: Graph,
                                     core_config: CoreConfig,
                                     fw_info: FrameworkInfo,
                                     fw_impl: FrameworkImplementation,
-                                    tb_w: TensorboardWriter = None) -> Graph:
+                                    tb_w: TensorboardWriter = None,
+                                    hessian_info_service: HessianInfoService = None,) -> Graph:
     """
     Prepares a trained model for post-training quantization.
     First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
@@ -55,6 +57,7 @@ def quantization_preparation_runner(graph: Graph,
             groups of layers by how they should be quantized, etc.).
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w: TensorboardWriter object for logging
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
     Returns:
         Graph object that represents the model, contains thresholds, and ready for quantization.
@@ -86,7 +89,8 @@ def quantization_preparation_runner(graph: Graph,
     ######################################
     # Calculate quantization params
     ######################################
-    calculate_quantization_params(graph)
+    calculate_quantization_params(graph, hessian_info_service=hessian_info_service)
     if tb_w is not None:
         tb_w.add_graph(graph, 'thresholds_selection')

model_compression_toolkit/core/runner.py CHANGED Viewed

@@ -48,6 +48,7 @@ def core_runner(in_model: Any,
                 fw_impl: FrameworkImplementation,
                 tpc: TargetPlatformCapabilities,
                 target_resource_utilization: ResourceUtilization = None,
+                running_gptq: bool = False,
                 tb_w: TensorboardWriter = None):
     """
     Quantize a trained model using post-training quantization.
@@ -97,7 +98,8 @@ def core_runner(in_model: Any,
                                      fw_impl,
                                      tpc,
                                      tb_w,
-                                     mixed_precision_enable=core_config.mixed_precision_enable)
+                                     mixed_precision_enable=core_config.mixed_precision_enable,
+                                     running_gptq=running_gptq)
     hessian_info_service = HessianInfoService(graph=graph,
                                               representative_dataset=representative_data_gen,
@@ -108,7 +110,8 @@ def core_runner(in_model: Any,
                                          core_config=core_config,
                                          fw_info=fw_info,
                                          fw_impl=fw_impl,
-                                         tb_w=tb_w)
+                                         tb_w=tb_w,
+                                         hessian_info_service=hessian_info_service)
     ######################################
     # Finalize bit widths

model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py CHANGED Viewed

@@ -42,8 +42,12 @@ if FOUND_TF:
         """
         weights_quantizers, _ = fw_impl.get_inferable_quantizers(node)
         if len(weights_quantizers) > 0:
+            # for positional weights we need to extract the weight's value.
+            weights_values = {attr: node.get_weights_by_keys(attr)
+                              for attr in weights_quantizers if isinstance(attr, int)}
             return KerasQuantizationWrapper(layer,
-                                            weights_quantizers)
+                                            weights_quantizers,
+                                            weights_values)
         return layer

model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py CHANGED Viewed

@@ -29,7 +29,7 @@ if FOUND_TORCH:
     def fully_quantized_wrapper(node: common.BaseNode,
                                 module: torch.nn.Module,
-                                fw_impl) -> Union[torch.nn.Module,PytorchQuantizationWrapper]:
+                                fw_impl) -> Union[torch.nn.Module, PytorchQuantizationWrapper]:
         """
         A function which takes a computational graph node and a pytorch module and
         perform the quantization wrapping
@@ -37,20 +37,26 @@ if FOUND_TORCH:
         Args:
             node: A node of mct graph.
             module: A Pytorch module
+            fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         Returns: Wrapped layer
         """
         weight_quantizers, _ = fw_impl.get_inferable_quantizers(node)
         if len(weight_quantizers) > 0:
-            return PytorchQuantizationWrapper(module, weight_quantizers)
+            # for positional weights we need to extract the weight's value.
+            weights_values = {attr: fw_impl.to_tensor(node.get_weights_by_keys(attr))
+                              for attr in weight_quantizers if isinstance(attr, int)}
+            return PytorchQuantizationWrapper(module, weight_quantizers, weights_values)
         return module
     def get_activation_quantizer_holder(node: BaseNode, fw_impl) -> Callable:
         """
         Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
         If the layer is not supposed to be wrapped with an activation quantizer - return None.
         Args:
             node: Node to attach a PytorchActivationQuantizationHolder to its output.
+            fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         Returns:
             A PytorchActivationQuantizationHolder module for the node's activation quantization.
         """
@@ -64,6 +70,7 @@ if FOUND_TORCH:
             f'PytorchActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers '
             f'were found for node {node}')
     def get_exportable_pytorch_model(graph: Graph):
         """
         Convert graph to fully quantized PyTorch model.

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -212,7 +212,8 @@ if FOUND_TF:
                                                                   fw_impl=fw_impl,
                                                                   tpc=target_platform_capabilities,
                                                                   target_resource_utilization=target_resource_utilization,
-                                                                  tb_w=tb_w)
+                                                                  tb_w=tb_w,
+                                                                  running_gptq=True)
         float_graph = copy.deepcopy(tg)

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -180,7 +180,9 @@ if FOUND_TORCH:
                                                                      fw_impl=fw_impl,
                                                                      tpc=target_platform_capabilities,
                                                                      target_resource_utilization=target_resource_utilization,
-                                                                     tb_w=tb_w)
+                                                                     tb_w=tb_w,
+                                                                     running_gptq=True)
         float_graph = copy.deepcopy(graph)
         # ---------------------- #

model_compression_toolkit/gptq/runner.py CHANGED Viewed

@@ -111,6 +111,7 @@ def gptq_runner(tg: Graph,
     #############################################
     # Gradient Based Post Training Quantization
     #############################################
+    Logger.info("Running GPTQ optimization.")
     tg_gptq = _apply_gptq(gptq_config,
                           gptq_representative_data_gen,
                           tb_w,

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py CHANGED Viewed

@@ -13,12 +13,12 @@
 # limitations under the License.
 # ==============================================================================
 from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH
-from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tp_model import get_tp_model, generate_tp_model, \
+from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model, generate_tp_model, \
     get_op_quantization_configs
 if FOUND_TF:
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import get_keras_tpc as get_keras_tpc_latest
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import generate_keras_tpc
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_latest
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import generate_keras_tpc
 if FOUND_TORCH:
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import get_pytorch_tpc as \
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import get_pytorch_tpc as \
         get_pytorch_tpc_latest
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import generate_pytorch_tpc
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import generate_pytorch_tpc

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py CHANGED Viewed

@@ -56,7 +56,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
     # We define a default quantization config for all non-specified weights attributes.
     default_weight_attr_config = AttributeQuantizationConfig(
-        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
+        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
         weights_n_bits=8,
         weights_per_channel_threshold=False,
         enable_weights_quantization=False,

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py CHANGED Viewed

@@ -32,7 +32,7 @@ def get_tp_model() -> TargetPlatformModel:
     NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
     (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
     'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
-    This version enables metadata by default
+    This version enables metadata by default.
     Returns: A TargetPlatformModel object.
@@ -44,7 +44,8 @@ def get_tp_model() -> TargetPlatformModel:
                              name='imx500_tp_model')
-def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
+def get_op_quantization_configs() -> \
+        Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
     """
     Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
     In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -151,6 +152,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
     # this configuration will be used for the operation quantization:
     default_configuration_options = tp.QuantizationConfigOptions([default_config])
+    # Create a QuantizationConfigOptions for quantizing constants in functional ops.
+    # Constant configuration is similar to the default eight bit configuration except for PoT
+    # quantization method for the constant.
+    # Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
+    # define the desired quantization properties for them.
+    const_config = default_config.clone_and_edit(
+        default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
+            enable_weights_quantization=True))
+    if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
+            const_config.default_weight_attr_config.weights_per_channel_threshold is False):
+        mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
+    const_configuration_options = tp.QuantizationConfigOptions([const_config])
     # Create a TargetPlatformModel and set its default quantization config.
     # This default configuration will be used for all operations
     # unless specified otherwise (see OperatorsSet, for example):
@@ -184,10 +198,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
         # Define operations sets without quantization configuration
         # options (useful for creating fusing patterns, for example):
         any_relu = tp.OperatorsSet("AnyReLU")
-        add = tp.OperatorsSet("Add")
-        sub = tp.OperatorsSet("Sub")
-        mul = tp.OperatorsSet("Mul")
-        div = tp.OperatorsSet("Div")
+        add = tp.OperatorsSet("Add", const_configuration_options)
+        sub = tp.OperatorsSet("Sub", const_configuration_options)
+        mul = tp.OperatorsSet("Mul", const_configuration_options)
+        div = tp.OperatorsSet("Div", const_configuration_options)
         prelu = tp.OperatorsSet("PReLU")
         swish = tp.OperatorsSet("Swish")
         sigmoid = tp.OperatorsSet("Sigmoid")

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py CHANGED Viewed

@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
         tp.OperationsSetToLayers("Add", [tf.add, Add])
         tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
         tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
-        tp.OperationsSetToLayers("Div", [tf.math.divide])
+        tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
         tp.OperationsSetToLayers("PReLU", [PReLU])
         tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
         tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py CHANGED Viewed

@@ -33,7 +33,7 @@ def get_tp_model() -> TargetPlatformModel:
     NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
     (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
     'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
-    This version enables metadata by default
+    This version enables metadata by default.
     Returns: A TargetPlatformModel object.
@@ -45,7 +45,8 @@ def get_tp_model() -> TargetPlatformModel:
                              name='imx500_lut_tp_model')
-def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
+def get_op_quantization_configs() -> \
+        Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
     """
     Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
     In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -57,13 +58,13 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
     # We define a default quantization config for all non-specified weights attributes.
     default_weight_attr_config = AttributeQuantizationConfig(
-        weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
+        weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
         weights_n_bits=8,
         weights_per_channel_threshold=False,
         enable_weights_quantization=False,
         lut_values_bitwidth=None)
-    # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
+    # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
     kernel_base_config = AttributeQuantizationConfig(
         weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
         weights_n_bits=8,
@@ -150,6 +151,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
     # this configuration will be used for the operation quantization:
     default_configuration_options = tp.QuantizationConfigOptions([default_config])
+    # Create a QuantizationConfigOptions for quantizing constants in functional ops.
+    # Constant configuration is similar to the default eight bit configuration except for PoT
+    # quantization method for the constant.
+    # Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
+    # define the desired quantization properties for them.
+    const_config = default_config.clone_and_edit(
+        default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
+            enable_weights_quantization=True))
+    if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
+            const_config.default_weight_attr_config.weights_per_channel_threshold is False):
+        mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
+    const_configuration_options = tp.QuantizationConfigOptions([const_config])
     # Create a TargetPlatformModel and set its default quantization config.
     # This default configuration will be used for all operations
     # unless specified otherwise (see OperatorsSet, for example):
@@ -181,10 +195,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
         # Define operations sets without quantization configuration
         # options (useful for creating fusing patterns, for example):
         any_relu = tp.OperatorsSet("AnyReLU")
-        add = tp.OperatorsSet("Add")
-        sub = tp.OperatorsSet("Sub")
-        mul = tp.OperatorsSet("Mul")
-        div = tp.OperatorsSet("Div")
+        add = tp.OperatorsSet("Add", const_configuration_options)
+        sub = tp.OperatorsSet("Sub", const_configuration_options)
+        mul = tp.OperatorsSet("Mul", const_configuration_options)
+        div = tp.OperatorsSet("Div", const_configuration_options)
         prelu = tp.OperatorsSet("PReLU")
         swish = tp.OperatorsSet("Swish")
         sigmoid = tp.OperatorsSet("Sigmoid")

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py CHANGED Viewed

@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
         tp.OperationsSetToLayers("Add", [tf.add, Add])
         tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
         tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
-        tp.OperationsSetToLayers("Div", [tf.math.divide])
+        tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
         tp.OperationsSetToLayers("PReLU", [PReLU])
         tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
         tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240419.358__py3-none-any.whl

mct-nightly 2.0.0.20240417.406py3-none-any.whl → 2.0.0.20240419.358py3-none-any.whl