PyPI - mct-nightly - Versions diffs - 2.4.0.20250924.535__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250924.535py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py CHANGED Viewed

@@ -14,6 +14,8 @@
 # ==============================================================================
 import copy
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
+from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
@@ -21,6 +23,7 @@ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_s
 def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
+                                   core_config: CoreConfig,
                                    fw_impl: FrameworkImplementation) -> Graph:
     """
     Get a graph, where each node has a final weights quantization configuration (with a bias
@@ -28,6 +31,7 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     Args:
         graph_to_apply_bias_correction: Graph to apply bias correction to.
+        core_config: CoreConfig containing parameters of how the model should be quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -36,14 +40,21 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     graph = copy.deepcopy(graph_to_apply_bias_correction)
     for n in graph.nodes:
-        if (n.final_weights_quantization_cfg and n.final_weights_quantization_cfg.bias_corrected is not None and
-                not n.final_weights_quantization_cfg.weights_second_moment_correction):
-            _apply_bias_correction_to_node(n, fw_impl)
+        # bias correction is only relevant for nodes with kernel op
+        kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
+        if core_config.quantization_config.weights_bias_correction and kernel_attr is not None and \
+            n.is_weights_quantization_enabled(kernel_attr) and \
+                not n.final_weights_quantization_cfg.weights_second_moment_correction:
+            # If a kernel was quantized and weights bias correction is enabled in n.quantization_cfg,
+            # a bias correction term was calculated during model preparation, and is used now in the node's bias term.
+            if n.final_weights_quantization_cfg.weights_bias_correction:
+                _apply_bias_correction_to_node(n, fw_impl, core_config.quantization_config)
     return graph
 def _apply_bias_correction_to_node(node: BaseNode,
-                                   fw_impl: FrameworkImplementation):
+                                   fw_impl: FrameworkImplementation,
+                                   qc: QuantizationConfig):
     """
     Set new bias to node using the bias-correction term that is stored in the
     final weights quantization configuration.
@@ -67,5 +78,7 @@ def _apply_bias_correction_to_node(node: BaseNode,
         node.set_weights_by_keys(fw_impl.constants.BIAS, - correction)
         node.framework_attr[fw_impl.constants.USE_BIAS] = True  # Mark the use_bias attribute of the node.
         node.final_weights_quantization_cfg.set_attr_config(fw_impl.constants.BIAS,
-                                                            WeightsAttrQuantizationConfig(AttributeQuantizationConfig(
-                                                                enable_weights_quantization=False)))
+                                                            WeightsAttrQuantizationConfig(
+                                                                qc,
+                                                                AttributeQuantizationConfig(
+                                                                    enable_weights_quantization=False)))

model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py CHANGED Viewed

@@ -24,7 +24,7 @@ from model_compression_toolkit.core.common.model_builder_mode import ModelBuilde
 from model_compression_toolkit.core.common.model_collector import ModelCollector
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
-    import compute_activation_qparams
+    import get_activations_qparams
 from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
 from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
@@ -32,6 +32,7 @@ from model_compression_toolkit.core.common.substitutions.apply_substitutions imp
 def _collect_and_assign_act_threshold(graph: Graph,
                                       representative_data_gen: Callable,
                                       core_config: CoreConfig,
+                                      fw_info: FrameworkInfo,
                                       fw_impl: FrameworkImplementation):
     """
     Collect statistics after second moment correction and assign new thresholds to activations.
@@ -40,32 +41,36 @@ def _collect_and_assign_act_threshold(graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
+        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
      """
     mi = ModelCollector(graph,
                         fw_impl,
-                        core_config.quantization_config)  # Mark points for statistics collection
+                        fw_info,
+                        core_config.quantization_config) # Mark points for statistics collection
     for _data in tqdm(representative_data_gen()):
         mi.infer(_data)
-    for n in graph.nodes:
+    for n in list(graph.nodes):
         if n.is_activation_quantization_enabled():
-            activation_params = compute_activation_qparams(quant_cfg=core_config.quantization_config,
-                                                           node_activation_quant_cfg=n.final_activation_quantization_cfg,
-                                                           node_prior_info=n.prior_info,
-                                                           out_stats_container=graph.get_out_stats_collector(n))
+            activation_params = get_activations_qparams(
+                activation_quant_cfg=n.final_activation_quantization_cfg,
+                nodes_prior_info=n.prior_info,
+                out_stats_container=graph.get_out_stats_collector(n))
             n.final_activation_quantization_cfg.set_activation_quantization_param(activation_params)
 def quantized_model_builder_for_second_moment_correction(graph: common.Graph,
+                                                         fw_info: FrameworkInfo,
                                                          fw_impl: Any):
     """
     Build a framework model from a graph for second moment correction.
     Args:
-        graph: Graph to build from.
+        graph: Graph to build the from.
+        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -74,13 +79,15 @@ def quantized_model_builder_for_second_moment_correction(graph: common.Graph,
     quantized_tg = quantize_graph_weights(graph)
     quantized_model, user_info = fw_impl.model_builder(quantized_tg,
-                                                       mode=ModelBuilderMode.FLOAT)
+                                                       mode=ModelBuilderMode.FLOAT,
+                                                       fw_info=fw_info)
     return quantized_model
 def apply_second_moment_correction_to_graph(graph: Graph,
                                             representative_data_gen: Callable,
                                             core_config: CoreConfig,
+                                            fw_info: FrameworkInfo,
                                             fw_impl: FrameworkImplementation) -> Graph:
     """
      Apply second moment correction on graph.
@@ -89,14 +96,15 @@ def apply_second_moment_correction_to_graph(graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
+        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
      Returns:
          Graph after second moment correction.
      """
-    semi_quantized_model = quantized_model_builder_for_second_moment_correction(graph, fw_impl)
+    semi_quantized_model = quantized_model_builder_for_second_moment_correction(graph, fw_info, fw_impl)
     fw_impl.apply_second_moment_correction(semi_quantized_model, core_config, representative_data_gen, graph)
     graph = substitute(graph, fw_impl.get_substitutions_after_second_moment_correction(core_config.quantization_config))
-    _collect_and_assign_act_threshold(graph, representative_data_gen, core_config, fw_impl)
+    _collect_and_assign_act_threshold(graph, representative_data_gen, core_config, fw_info, fw_impl)
     return graph

model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py CHANGED Viewed

@@ -18,7 +18,7 @@ from typing import Any, Callable
 from model_compression_toolkit.core import QuantizationConfig
 from model_compression_toolkit.core.common import BaseNode, Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.quantization.quantization_fn_selection import get_activation_quantization_fn
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 def get_previous_node_with_activation_quantization(linear_node: BaseNode,
@@ -64,11 +64,11 @@ def calculate_bin_centers(bin_edges: np.ndarray) -> np.ndarray:
 def compute_activation_bias_correction(graph: Graph,
                                        quant_config: QuantizationConfig,
+                                       fw_info: FrameworkInfo,
                                        fw_impl: FrameworkImplementation,
                                        linear_node: BaseNode,
                                        prev_node: BaseNode,
-                                       kernel_size: str,
-                                       get_activation_quantization_fn_factory: Callable) -> Graph:
+                                       kernel_size: str) -> Graph:
     """
     Compute the activation bias correction term, and store it in the final activation
     quantization configuration.
@@ -76,11 +76,11 @@ def compute_activation_bias_correction(graph: Graph,
     Args:
         graph: Graph with nodes to compute the activation bias correction for each node's final activation quantization configuration.
         quant_config: QuantizationConfig of how the model should be quantized.
+        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         linear_node: Node to compute the activation bias correction for.
         prev_node: Node to compute the activation error caused by his activation quantization.
         kernel_size: The framework specific attribute name of the convolution layer's kernel size.
-        get_activation_quantization_fn_factory: activation quantization functions factory.
     Returns:
         Graph with activation bias correction term for each node.
@@ -107,9 +107,7 @@ def compute_activation_bias_correction(graph: Graph,
     float_centers = calculate_bin_centers(float_bins)
     # Quantize the bin edges and calculate the centers of the quantized bins
-    activation_quantizer = get_activation_quantization_fn(prev_node_act_quant_cfg,
-                                                          get_activation_quantization_fn_factory)
-    quant_bins = activation_quantizer(fw_impl.to_tensor(float_bins))
+    quant_bins = prev_node_act_quant_cfg.quantize_node_output(fw_impl.to_tensor(float_bins))
     quant_bins = fw_impl.to_numpy(quant_bins)
     quant_centers = calculate_bin_centers(quant_bins)
@@ -129,18 +127,19 @@ def compute_activation_bias_correction(graph: Graph,
     if normalized_bias < quant_config.activation_bias_correction_threshold:
         return graph
-    kernel = linear_node.get_weights_by_keys(linear_node.kernel_attr)
+    kernel = linear_node.get_weights_by_keys(fw_info.kernel_ops_attributes_mapping.get(linear_node.type)[0])
     # Compute the activation bias correction by applying the quantization error to the kernel, resulting in an output
     # size matching the number of output channels.
     if kernel is not None:
         # Get the axes that are not the output channel.
+        output_channel_index, input_channel_index = fw_info.kernel_channels_mapping.get(linear_node.type)
         axis_not_output_channel = list(range(len(kernel.shape)))
-        axis_not_output_channel.remove(linear_node.channel_axis.output)
+        axis_not_output_channel.remove(output_channel_index)
         # Special case of depthwise_conv2d in tensorflow, where we have a depth multiplier for the filters.
-        if linear_node.channel_axis.output == linear_node.channel_axis.input:
+        if output_channel_index == input_channel_index:
             axis_not_output_channel.remove(3)  # 3 is the depth multiplier index.
         activation_bias_correction_term = mean_diff * np.sum(kernel, axis=tuple(axis_not_output_channel))
@@ -151,20 +150,21 @@ def compute_activation_bias_correction(graph: Graph,
 def compute_activation_bias_correction_of_graph(graph: Graph,
                                                 quant_config: QuantizationConfig,
+                                                fw_info: FrameworkInfo,
                                                 fw_impl: FrameworkImplementation,
                                                 activation_bias_correction_node_matchers: Callable,
-                                                kernel_size: str,
-                                                get_activation_quantization_fn_factory: Callable) -> Graph:
+                                                kernel_size: str) -> Graph:
     """
     Compute the activation bias correction term for the graph.
     Args:
         graph: Graph with nodes to compute the activation bias correction.
         quant_config: QuantizationConfig of how the model should be quantized.
+        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         activation_bias_correction_node_matchers: Function to match the layers for activation bias correction.
         kernel_size: The framework specific attribute name of the convolution layer's kernel size.
-        get_activation_quantization_fn_factory: activation quantization functions factory.
     Returns:
         Graph with activation bias correction term for each relevant node.
@@ -177,9 +177,9 @@ def compute_activation_bias_correction_of_graph(graph: Graph,
             if prev_node is not None:
                 graph = compute_activation_bias_correction(graph=graph,
                                                            quant_config=quant_config,
+                                                           fw_info=fw_info,
                                                            fw_impl=fw_impl,
                                                            linear_node=n,
                                                            prev_node=prev_node,
-                                                           kernel_size=kernel_size,
-                                                           get_activation_quantization_fn_factory=get_activation_quantization_fn_factory)
+                                                           kernel_size=kernel_size)
     return graph

model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Any
 import numpy as np
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common import BaseNode, Graph
 from model_compression_toolkit.core.common.quantization.quantize_node import get_quantized_weights_attr_by_qc
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
@@ -25,6 +26,7 @@ from model_compression_toolkit.logger import Logger
 def compute_bias_correction_of_graph(graph: Graph,
+                                     fw_info: FrameworkInfo,
                                      fw_impl: FrameworkImplementation) -> Graph:
     """
     For each node in a graph, and for each candidate weights quantization configuration,
@@ -33,6 +35,7 @@ def compute_bias_correction_of_graph(graph: Graph,
     Args:
         graph: Graph with nodes to compute the bias correction for
         each node's weights quantization configuration candidates.
+        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -43,14 +46,25 @@ def compute_bias_correction_of_graph(graph: Graph,
     for n in graph.nodes:
         # Bias correction is computed based on the quantized kernel, so we need to get the specific kernel attribute
         # name out of all the weights attributes of the node.
-        if n.kernel_attr and n.is_weights_quantization_enabled(n.kernel_attr) and not n.has_positional_weights:
-            _compute_bias_correction_per_candidate_qc(n, n.kernel_attr, graph.get_in_stats_collector(n),
-                                                      fw_impl=fw_impl)
+        if fw_info.is_kernel_op(n.type):
+            kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
+            if n.is_weights_quantization_enabled(kernel_attr):
+                # Bias correction is not applied to layers with constant inputs.
+                if n.has_positional_weights:
+                    for candidate_qc in n.candidates_quantization_cfg:
+                        candidate_qc.weights_quantization_cfg.weights_bias_correction = False
+                else:
+                    _compute_bias_correction_per_candidate_qc(n,
+                                                              kernel_attr,
+                                                              fw_info,
+                                                              graph.get_in_stats_collector(n),
+                                                              fw_impl=fw_impl)
     return graph
 def _compute_bias_correction_per_candidate_qc(node: BaseNode,
                                               kernel_attr: str,
+                                              fw_info: FrameworkInfo,
                                               node_in_stats_collector: BaseStatsCollector,
                                               fw_impl: FrameworkImplementation):
     """
@@ -60,13 +74,15 @@ def _compute_bias_correction_per_candidate_qc(node: BaseNode,
     Args:
         node: Node to compute the bias correction for its different candidates.
         kernel_attr: The name of the kernel attribute of the node.
+        fw_info: Framework info like lists of nodes their kernel should quantized.
         node_in_stats_collector: Statistics collector of the node for the mean per-channel.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     """
     for candidate_qc in node.candidates_quantization_cfg:
-        if not candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
+        if candidate_qc.weights_quantization_cfg.weights_bias_correction and not \
+                candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
             quantized_kernel, io_channels_axes = get_quantized_weights_attr_by_qc(kernel_attr,
                                                                                   node,

model_compression_toolkit/core/common/statistics_correction/statistics_correction.py CHANGED Viewed

@@ -32,6 +32,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
 def statistics_correction_runner(transformed_graph: Graph,
                                  core_config: CoreConfig,
+                                 fw_info: FrameworkInfo,
                                  fw_impl: FrameworkImplementation,
                                  tb_w: TensorboardWriter = None, ) -> Graph:
     """
@@ -40,6 +41,7 @@ def statistics_correction_runner(transformed_graph: Graph,
         transformed_graph: Graph to add statistics correction.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
+        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
@@ -56,9 +58,9 @@ def statistics_correction_runner(transformed_graph: Graph,
     ########################################################
     # Compute bias correction to nodes' config candidates
     ########################################################
-    if core_config.quantization_config.weights_bias_correction:
-        tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
-                                                        fw_impl)
+    tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
+                                                    fw_info,
+                                                    fw_impl)
     if tb_w is not None:
         tb_w.add_graph(tg_with_bias, 'statistics_computation')
@@ -69,6 +71,7 @@ def statistics_correction_runner(transformed_graph: Graph,
 def apply_statistics_correction(transformed_graph: Graph,
                                 representative_data_gen: Callable,
                                 core_config: CoreConfig,
+                                fw_info: FrameworkInfo,
                                 fw_impl: FrameworkImplementation,
                                 tb_w: TensorboardWriter = None, ) -> Graph:
     """
@@ -78,6 +81,7 @@ def apply_statistics_correction(transformed_graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
+        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
@@ -90,13 +94,14 @@ def apply_statistics_correction(transformed_graph: Graph,
     #############################################
     if core_config.quantization_config.weights_second_moment_correction:
         transformed_graph = apply_second_moment_correction_to_graph(transformed_graph, representative_data_gen,
-                                                                    core_config, fw_impl)
+                                                                    core_config, fw_info, fw_impl)
     #############################################
     # Apply Bias Correction
     #############################################
     if core_config.quantization_config.weights_bias_correction:
         transformed_graph = apply_bias_correction_to_graph(transformed_graph,
+                                                           core_config,
                                                            fw_impl=fw_impl)
     if tb_w is not None:
         tb_w.add_graph(transformed_graph, 'after_statistics_correction')

model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import Callable
 import numpy as np
 from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
     ActivationQuantizationMode
@@ -83,28 +84,30 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
         # If the linear operator is part of a reused group (it is the "base" node, or a reused node),
         # we should skip the substitution.
         if source_node.is_reused():
+            for qc in source_node.candidates_quantization_cfg:
+                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
         # We apply only on nodes with folded BatchNormalization.
         if source_node.prior_info.std_output is None or source_node.prior_info.mean_output is None:
+            for qc in source_node.candidates_quantization_cfg:
+                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
         # This feature disabled for models with weights quantization method of Power of 2
         for qc in source_node.candidates_quantization_cfg:
             # this feature is relevant only for layers with kernel op
-            if source_node.kernel_attr is None:
+            kernel_attr = graph.fw_info.get_kernel_op_attributes(source_node.type)
+            if kernel_attr is None:
                 Logger.error(f"Can't preform BatchNorm reconstruction on a node {source_node.name} without a kernel op.")
-            if (qc.weights_quantization_cfg.get_attr_config(source_node.kernel_attr).weights_quantization_method
+            if (qc.weights_quantization_cfg.get_attr_config(kernel_attr[0]).weights_quantization_method
                     == QuantizationMethod.POWER_OF_TWO):
                 Logger.warning("Second moment statistics correction feature disabled for models with weights "
                                "quantization method of Power of 2")
+                for qc_inner in source_node.candidates_quantization_cfg:
+                    qc_inner.weights_quantization_cfg.weights_second_moment_correction = False
                 return graph
-        # turn on second moment correction flag
-        def set_second_moment_correction(qc):
-            qc.weights_quantization_cfg.weights_second_moment_correction = True
-        source_node.quantization_cfg.update_all(set_second_moment_correction)
         eps = self.epsilon_val
         original_gamma = source_node.prior_info.std_output
@@ -122,7 +125,7 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
         bn_node.prior_info = copy.deepcopy(source_node.prior_info)
-        bn_node.quantization_cfg = copy.deepcopy(source_node.quantization_cfg)
+        bn_node.candidates_quantization_cfg = copy.deepcopy(source_node.candidates_quantization_cfg)
         for qc in bn_node.candidates_quantization_cfg:
             qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
@@ -137,6 +140,7 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
                     #  reconstructed node BN attributes need to be quantized and how.
                     qc.weights_quantization_cfg.set_attr_config(attr,
                                                                 WeightsAttrQuantizationConfig(
+                                                                    QuantizationConfig(),
                                                                     AttributeQuantizationConfig(
                                                                         enable_weights_quantization=False)))

model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py CHANGED Viewed

@@ -157,7 +157,7 @@ class BatchNormalizationRefusing(common.BaseSubstitution):
         graph.remove_node(bn_node)
         graph.remove_node(source_node)
-        self._calc_weights_quantization_params(conv_bn, weights_scale)
+        self._calc_weights_quantization_params(conv_bn, weights_scale, graph.fw_info)
         assert num_nodes_before_substitution - len(graph.nodes) == 1
         assert num_edges_before_substitution - len(graph.edges) == 1
@@ -165,15 +165,18 @@ class BatchNormalizationRefusing(common.BaseSubstitution):
     def _calc_weights_quantization_params(self,
                                           conv_bn: BaseNode,
-                                          weights_scale: np.ndarray):
+                                          weights_scale: np.ndarray,
+                                          fw_info):
         """
         Update node weights quantization params.
         Args:
             conv_bn: Convolution node to update the weights quantization params.
             weights_scale: Weight scale factor in which to multiply the conv node's weight.
+            fw_info: FrameworkInfo object with information about the specific framework's model
         """
         # Conv layer is ensured to have a kernel attribute
-        conv_bn_kernel_cfg = conv_bn.final_weights_quantization_cfg.get_attr_config(conv_bn.kernel_attr)
+        kernel_attr = fw_info.get_kernel_op_attributes(conv_bn.type)[0]
+        conv_bn_kernel_cfg = conv_bn.final_weights_quantization_cfg.get_attr_config(kernel_attr)
         # In case of SYMMETRIC weight quantization method, we update the threshold by weights_scale
         if conv_bn_kernel_cfg.weights_quantization_method == QuantizationMethod.SYMMETRIC:
             original_threshold = conv_bn_kernel_cfg.weights_quantization_params[THRESHOLD]

model_compression_toolkit/core/common/substitutions/scale_equalization.py CHANGED Viewed

@@ -20,6 +20,8 @@ import scipy
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.defaultdict import DefaultDict
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
@@ -75,6 +77,7 @@ def fixed_second_moment_after_relu(mu: np.ndarray,
 def scale_reshaping(scale: np.ndarray,
                     op2d: common.BaseNode,
+                    kernel_channel_mapping: DefaultDict,
                     kernel_str: str,
                     in_channels: bool = True) -> np.ndarray:
     """
@@ -86,6 +89,7 @@ def scale_reshaping(scale: np.ndarray,
     Args:
         scale: Scale factor to scale the kernel channels by.
         op2d: Node to scale its kernel.
+        kernel_channel_mapping: Mapping from a layer to a tuple of indices of its output/input kernel channels.
         kernel_str: The framework specific attribute name of the convolution layer's weight/kernel.
         in_channels: Kernel's index of input channels.
@@ -95,11 +99,12 @@ def scale_reshaping(scale: np.ndarray,
     op_ndims = op2d.get_weights_by_keys(kernel_str).ndim
     reshape_target = np.ones(op_ndims, dtype=np.int32)
-    reshape_target[op2d.channel_axis.input if in_channels else op2d.channel_axis.output] = -1
+    reshape_target[kernel_channel_mapping.get(op2d.type)[int(in_channels)]] = -1
     return np.reshape(scale, reshape_target)
-def update_linear_nodes(first_op2d_node: BaseNode,
+def update_linear_nodes(fw_info: FrameworkInfo,
+                        first_op2d_node: BaseNode,
                         second_op2d_node: BaseNode,
                         scale_factor: np.ndarray,
                         kernel_str: str,
@@ -111,6 +116,7 @@ def update_linear_nodes(first_op2d_node: BaseNode,
     The scale factor contain a scale value per-channel.
     Args:
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         first_op2d_node: Node to multiply its kernel by the scale factor.
         second_op2d_node: Node to divide its kernel by the scale factor.
@@ -119,12 +125,15 @@ def update_linear_nodes(first_op2d_node: BaseNode,
         kernel_str: The framework specific attribute name of the convolution layer's weight/kernel.
     """
     w2_fixed = second_op2d_node.get_weights_by_keys(kernel_str) / scale_reshaping(scale_factor,
                                                                                   second_op2d_node,
+                                                                                  fw_info.kernel_channels_mapping,
                                                                                   kernel_str)
     w1_fixed = first_op2d_node.get_weights_by_keys(kernel_str) * scale_reshaping(scale_factor,
                                                                                  first_op2d_node,
+                                                                                 fw_info.kernel_channels_mapping,
                                                                                  kernel_str,
                                                                                  in_channels=False)
@@ -159,7 +168,8 @@ def calculate_scale_correction(first_op2d_node: BaseNode) -> tuple:
     return scale_factor
-def scale_equalization_lnl(first_op2d_node: BaseNode,
+def scale_equalization_lnl(fw_info: FrameworkInfo,
+                           first_op2d_node: BaseNode,
                            second_op2d_node: BaseNode,
                            kernel_str: str,
                            bias_str: str):
@@ -169,6 +179,7 @@ def scale_equalization_lnl(first_op2d_node: BaseNode,
     follows the activation node to get the same expected output without the scaling.
     Args:
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         first_op2d_node: Node to multiply its kernel by the scale factor.
         second_op2d_node: Node to divide its kernel by the scale factor.
@@ -178,7 +189,8 @@ def scale_equalization_lnl(first_op2d_node: BaseNode,
     """
     scale_factor = calculate_scale_correction(first_op2d_node)
-    update_linear_nodes(first_op2d_node,
+    update_linear_nodes(fw_info,
+                        first_op2d_node,
                         second_op2d_node,
                         scale_factor,
                         kernel_str,
@@ -194,6 +206,7 @@ class BaseScaleEqualization(common.BaseSubstitution):
     def __init__(self,
                  quant_config: QuantizationConfig,
+                 fw_info: FrameworkInfo,
                  matcher_instance,
                  kernel_str: str,
                  bias_str: str):
@@ -201,11 +214,13 @@ class BaseScaleEqualization(common.BaseSubstitution):
         Initialize a ScaleEqualization object.
         Args:
             quant_config: QuantizationConfig containing parameters of how the model should be quantized.
+            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
             groups of layers by how they should be quantized, etc.)
             matcher_instance: Per substitution matcher instance of type WalkMatcher
         """
         self.quant_config = quant_config
+        self.fw_info = fw_info
         self.kernel_str = kernel_str
         self.bias_str = bias_str
         super().__init__(matcher_instance=matcher_instance)
@@ -228,7 +243,8 @@ class BaseScaleEqualization(common.BaseSubstitution):
         act_node = nodes_list[1]
         second_op2d_node = nodes_list[-1]
         if first_op2d_node.prior_info.std_output is not None and act_node.is_activation_quantization_enabled():
-            scale_equalization_lnl(first_op2d_node,
+            scale_equalization_lnl(self.fw_info,
+                                   first_op2d_node,
                                    second_op2d_node,
                                    self.kernel_str,
                                    self.bias_str)

mct-nightly 2.4.0.20250924.535__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250924.535py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl