PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py CHANGED Viewed

@@ -19,6 +19,7 @@ from model_compression_toolkit.core.common.pruning.pruning_framework_implementat
     PruningFrameworkImplementation
 from model_compression_toolkit.core.common.pruning.pruning_section import PruningSection
 from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.pytorch.constants import BIAS, GROUPS, OUT_CHANNELS, OUT_FEATURES, NUM_FEATURES, \
     IN_CHANNELS, IN_FEATURES, NUM_PARAMETERS
@@ -29,10 +30,6 @@ import numpy as np
 from model_compression_toolkit.logger import Logger
-# default output channel axis to use when it's not defined in node's fw_info.
-_default_output_channel_axis = 1
 class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplementation):
     """
     Implementation of the PruningFramework for the Pytorch framework. This class provides
@@ -42,23 +39,27 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
     def prune_entry_node(self,
                          node: BaseNode,
-                         output_mask: np.ndarray):
+                         output_mask: np.ndarray,
+                         fw_info: FrameworkInfo):
         """
         Prunes the entry node of a model in Pytorch.
         Args:
             node (BaseNode): The entry node to be pruned.
             output_mask (np.ndarray): A numpy array representing the mask to be applied to the output channels.
+            fw_info (FrameworkInfo): Framework-specific information object.
         """
         return _prune_pytorch_edge_node(node=node,
                                         mask=output_mask,
+                                        fw_info=fw_info,
                                         is_exit_node=False)
     def prune_intermediate_node(self,
                                 node: BaseNode,
                                 input_mask: np.ndarray,
-                                output_mask: np.ndarray):
+                                output_mask: np.ndarray,
+                                fw_info: FrameworkInfo):
         """
         Prunes an intermediate node in a Pytorch model.
@@ -66,11 +67,12 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
             node (BaseNode): The intermediate node to be pruned.
             input_mask (np.ndarray): A numpy array representing the mask to be applied to the input channels.
             output_mask (np.ndarray): A numpy array representing the mask to be applied to the output channels.
+            fw_info (FrameworkInfo): Framework-specific information object.
         """
         # TODO (reuvenp/liord): Address handling of node parameters that can be either a single value across all channels or distinct per channel, e.g., PReLU. Consider developing a structured approach.
         pruning_en = True
-        _edit_node_input_shape(node, input_mask)
+        _edit_node_input_shape(node, input_mask, fw_info)
         pruned_parameters = {}
         mask_bool = output_mask.astype(bool)
         node.weights = pruned_parameters
@@ -89,17 +91,20 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
     def prune_exit_node(self,
                         node: BaseNode,
-                        input_mask: np.ndarray):
+                        input_mask: np.ndarray,
+                        fw_info: FrameworkInfo):
         """
         Prunes the exit node of a model in Pytorch.
         Args:
             node (BaseNode): The exit node to be pruned.
             input_mask (np.ndarray): A numpy array representing the mask to be applied to the input channels.
+            fw_info (FrameworkInfo): Framework-specific information object.
         """
         return _prune_pytorch_edge_node(node=node,
                                         mask=input_mask,
+                                        fw_info=fw_info,
                                         is_exit_node=True)
     def is_node_entry_node(self, node: BaseNode) -> bool:
@@ -116,19 +121,22 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
     def is_node_exit_node(self,
                           node: BaseNode,
-                          corresponding_entry_node: BaseNode) -> bool:
+                          corresponding_entry_node: BaseNode,
+                          fw_info: FrameworkInfo) -> bool:
         """
         Determines whether a node is an exit node in a Pytorch model.
         Args:
             node (BaseNode): The node to be checked.
             corresponding_entry_node (BaseNode): The entry node of the pruning section that is checked.
+            fw_info (FrameworkInfo) Framework-specific information object.
         Returns:
             bool: Boolean indicating if the node is an exit node.
         """
         return _is_pytorch_node_pruning_section_edge(node) and PruningSection.has_matching_channel_count(node,
-                                                                                                         corresponding_entry_node)
+                                                                                                         corresponding_entry_node,
+                                                                                                         fw_info)
     def is_node_intermediate_pruning_section(self, node: BaseNode) -> bool:
         """
@@ -147,7 +155,8 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
                                  torch.nn.Linear]
     def attrs_oi_channels_info_for_pruning(self,
-                                           node: BaseNode) -> Dict[str, Tuple[int, int]]:
+                                           node: BaseNode,
+                                           fw_info: FrameworkInfo) -> Dict[str, Tuple[int, int]]:
         """
         Retrieves the attributes of a given node along with the output/input (OI) channel axis
         for each attribute used to prune these attributes.
@@ -164,6 +173,7 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
         Args:
             node (BaseNode): The node from the computational graph.
+            fw_info (FrameworkInfo): Contains framework-specific information and utilities.
         Returns:
             Dict[str, Tuple[int, int]]: A dictionary where each key is an attribute name (like 'weight' or 'bias')
@@ -171,8 +181,13 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
         """
         attributes_with_axis = {}
-        if node.kernel_attr:
-            attributes_with_axis[node.kernel_attr] = (node.channel_axis.output, node.channel_axis.input)
+        if fw_info.is_kernel_op(node.type):
+            kernel_attributes = fw_info.get_kernel_op_attributes(node.type)
+            if kernel_attributes is None or len(kernel_attributes) == 0:
+                Logger.critical(f"Expected to find kernel attributes but none were identified for node '{node.name}' of type {node.type}.")
+            for attr in kernel_attributes:
+                attributes_with_axis[attr] = fw_info.kernel_channels_mapping.get(node.type)
             # Bias is a vector at the length of the number of output channels.
             # For this reason, input channel axis is irrelevant to the bias attribute.
@@ -187,17 +202,13 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
                 # If the number of float parameters is 1 or less - is the case where
                 # we have one parameter for all channels. For this case, we don't
                 # want to prune the parameter.
-                if node.get_num_parameters()[1] <= 1:
+                if node.get_num_parameters(fw_info)[1] <= 1:
                     attributes_with_axis[attr] = (None, None)
                 else:
                     attributes_with_axis[attr] = (-1, None)
         return attributes_with_axis
-    @property
-    def default_output_channel_axis(self):
-        return _default_output_channel_axis
 def _is_pytorch_node_pruning_section_edge(node: BaseNode) -> bool:
     """
@@ -223,6 +234,7 @@ def _is_pytorch_node_pruning_section_edge(node: BaseNode) -> bool:
 def _prune_pytorch_edge_node(node: BaseNode,
                              mask: np.ndarray,
+                             fw_info: FrameworkInfo,
                              is_exit_node: bool):
     """
     Prunes the given Pytorch node by applying the mask to the node's weights (weights and biases).
@@ -231,18 +243,21 @@ def _prune_pytorch_edge_node(node: BaseNode,
     Args:
         node (BaseNode): The node to be pruned.
         mask (np.ndarray): The pruning mask to be applied.
+        fw_info (FrameworkInfo): Framework-specific information object.
         is_exit_node (bool): A boolean indicating whether the node is an exit node.
     """
     # Retrieve the kernel attribute and the axes to prune.
-    axis_to_prune = node.channel_axis.input if is_exit_node else node.channel_axis.output
-    kernel = node.get_weights_by_keys(node.kernel_attr)
+    kernel_attr = fw_info.get_kernel_op_attributes(node.type)[0]
+    io_axis = fw_info.kernel_channels_mapping.get(node.type)
+    axis_to_prune = io_axis[int(is_exit_node)]
+    kernel = node.get_weights_by_keys(kernel_attr)
     # Convert mask to boolean.
     mask_bool = mask.astype(bool)
     pruned_kernel = kernel.compress(mask_bool, axis=axis_to_prune)
-    node.set_weights_by_keys(name=node.kernel_attr, tensor=pruned_kernel)
+    node.set_weights_by_keys(name=kernel_attr, tensor=pruned_kernel)
     if not is_exit_node and node.framework_attr[BIAS]:
         # Prune the bias if applicable and it's an entry node.
@@ -270,11 +285,12 @@ def _prune_pytorch_edge_node(node: BaseNode,
             Logger.critical(f"{node.type} is currently not supported"
                              f"as an edge node in a pruning section")
         # Adjust the input shape for the last node in the section.
-        _edit_node_input_shape(node, mask_bool)
+        _edit_node_input_shape(node, mask_bool, fw_info)
 def _edit_node_input_shape(node: BaseNode,
-                           input_mask: np.ndarray):
+                           input_mask: np.ndarray,
+                           fw_info: FrameworkInfo):
     """
     Adjusts the input shape of a node based on the given input mask.
@@ -285,13 +301,14 @@ def _edit_node_input_shape(node: BaseNode,
     Args:
         node (BaseNode): The node whose input shape needs to be adjusted.
         input_mask (np.ndarray): A binary array where 1 indicates the channel is kept and 0 means pruned.
+        fw_info (FrameworkInfo): Framework-specific information object.
     """
     # Start with the current input shape of the node.
     new_input_shape = list(node.input_shape)
     # Adjust the last dimension of the shape to match the number of unpruned (retained) channels.
     # This is done by summing the mask, as each '1' in the mask represents a retained channel.
-    channel_axis = _default_output_channel_axis if node.out_channel_axis is None else node.out_channel_axis
+    channel_axis = fw_info.out_channel_axis_mapping.get(node.type)
     new_input_shape[0][channel_axis] = int(np.sum(input_mask))
     # Update the node's input shape with the new dimensions.

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -26,7 +26,7 @@ from torch.nn import Module, Sigmoid, Softmax
 import model_compression_toolkit.core.pytorch.constants as pytorch_constants
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core import QuantizationConfig, CoreConfig
+from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
@@ -37,6 +37,7 @@ from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_kl_divergence, compute_cs
 from model_compression_toolkit.core.pytorch.back2framework import get_pytorch_model_builder
 from model_compression_toolkit.core.pytorch.data_util import data_gen_to_dataloader
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.batchnorm_folding import \
     pytorch_batchnorm_folding, pytorch_batchnorm_forward_folding
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.batchnorm_reconstruction import \
@@ -177,6 +178,7 @@ class PytorchImplementation(FrameworkImplementation):
                       graph: Graph,
                       mode: ModelBuilderMode,
                       append2output: List[Any] = None,
+                      fw_info: FrameworkInfo = DEFAULT_PYTORCH_INFO,
                       return_float_outputs: bool = False) -> Tuple:
         """
         Build a Pytorch module from a graph.
@@ -187,6 +189,7 @@ class PytorchImplementation(FrameworkImplementation):
             graph: Graph to build the module from it.
             mode: Mode for how to build the module.
             append2output: List of Nodes to set as the module's outputs.
+            fw_info: FrameworkInfo object with information about the specific framework's module
             return_float_outputs (bool): whether to return outputs before or after quantization nodes (default)
         Returns:
@@ -195,6 +198,7 @@ class PytorchImplementation(FrameworkImplementation):
         pytorch_model_builder = get_pytorch_model_builder(mode)
         return pytorch_model_builder(graph=graph,
                                      append2output=append2output,
+                                     fw_info=fw_info,
                                      return_float_outputs=return_float_outputs).build_model()
     def run_model_inference(self,
@@ -228,55 +232,63 @@ class PytorchImplementation(FrameworkImplementation):
     def shift_negative_correction(self,
                                   graph: Graph,
-                                  core_config: CoreConfig) -> Graph:
+                                  core_config: CoreConfig,
+                                  fw_info: FrameworkInfo) -> Graph:
         """
         Apply shift negative correction (SNC) on a graph.
         Args:
             graph: Graph to apply SNC on.
             core_config: Quantization configuration.
+            fw_info: FrameworkInfo object with information about the specific framework's module.
         Returns:
             Graph after SNC.
         """
         return pytorch_apply_shift_negative_correction(graph,
-                                                       core_config)
+                                                       core_config,
+                                                       fw_info)
     def compute_activation_bias_correction(self,
                                            graph: Graph,
-                                           quant_config: QuantizationConfig):
+                                           quant_config: QuantizationConfig,
+                                           fw_info: FrameworkInfo):
         """
         Compute activation bias correction on a graph.
         Args:
             graph: Graph to apply activation bias correction on.
             quant_config: QuantizationConfig of how the model should be quantized.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             Graph after activation bias correction computing.
         """
         return pytorch_compute_activation_bias_correction_of_graph(graph=graph,
                                                                    quant_config=quant_config,
+                                                                   fw_info=fw_info,
                                                                    fw_impl=self)
     def get_substitutions_channel_equalization(self,
-                                               quant_config: QuantizationConfig) -> List[common.BaseSubstitution]:
+                                               quant_config: QuantizationConfig,
+                                               fw_info: FrameworkInfo) -> List[common.BaseSubstitution]:
         """
         Return a list of the framework substitutions used for channel equalization.
         Args:
             quant_config: QuantizationConfig to determine which substitutions to return.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             A list of the framework substitutions used after we collect statistics.
         """
         substitutions_list = []
         if quant_config.activation_channel_equalization:
-            substitutions_list.extend([ScaleEqualization(quant_config),
-                                       ScaleEqualizationWithPad(quant_config)])
+            substitutions_list.extend([ScaleEqualization(quant_config, fw_info),
+                                       ScaleEqualizationWithPad(quant_config, fw_info)])
         return substitutions_list
-    def get_substitutions_prepare_graph(self) -> List[common.BaseSubstitution]:
+    def get_substitutions_prepare_graph(self, fw_info: FrameworkInfo = None) -> List[common.BaseSubstitution]:
         """
         Returns: A list of the framework substitutions used before we collect the prior information.
@@ -287,7 +299,7 @@ class PytorchImplementation(FrameworkImplementation):
                 ScaledDotProductDecomposition(),
                 MatMulDecomposition(),
                 TransformFunctionCallMethod(),
-                FunctionalConvSubstitution(),
+                FunctionalConvSubstitution(fw_info),
                 FunctionalBatchNorm(),
                 FunctionalLayerNorm(),
                 FunctionalLinear(),
@@ -389,17 +401,20 @@ class PytorchImplementation(FrameworkImplementation):
     def get_node_prior_info(self,
                             node: BaseNode,
+                            fw_info: FrameworkInfo,
                             graph: Graph) -> NodePriorInfo:
         """
         Get a NodePriorInfo object for a node that represents a Pytorch layer.
         Args:
             node: Node to get its prior info.
+            fw_info: Framework specific information needed to create the prior info of the node.
             graph: Graph to check the next node type.
         Returns:
             NodePriorInfo with information about the node.
         """
         return create_node_prior_info(node=node,
+                                      fw_info=fw_info,
                                       graph=graph)
     def count_node_for_mixed_precision_interest_points(self, node: BaseNode) -> bool:
@@ -461,19 +476,23 @@ class PytorchImplementation(FrameworkImplementation):
         return node.layer_class not in [argmax, softmax, Softmax]
     def get_node_mac_operations(self,
-                                node: BaseNode) -> float:
+                                node: BaseNode,
+                                fw_info: FrameworkInfo) -> float:
         """
         Gets the MAC operation count for a given operation.
         Args:
             node: A graph node that wraps the operation for which the MAC count is computed.
+            fw_info: FrameworkInfo object with information about the Pytorch model.
         Returns: The MAC count of the operation
         """
-        if node.kernel_attr is None:
+        kernels = fw_info.get_kernel_op_attributes(node.type)
+        if not kernels or kernels[0] is None:
             return 0
-        kernel_shape = node.get_weights_by_keys(node.kernel_attr).shape
+        assert len(kernels) == 1
+        kernel_shape = node.get_weights_by_keys(kernels[0]).shape
         if node.is_match_type(Conv2d) or node.is_match_type(ConvTranspose2d):
             h, w = node.get_output_shapes_list()[0][-2:]
@@ -481,7 +500,8 @@ class PytorchImplementation(FrameworkImplementation):
         if node.is_match_type(Linear):
             # IN * OUT * (all previous dims[:-1])
-            return node.get_total_output_params() * kernel_shape[node.channel_axis.input]
+            _, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
+            return node.get_total_output_params() * kernel_shape[input_channel_axis]
         return 0

model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py CHANGED Viewed

@@ -23,19 +23,23 @@ from model_compression_toolkit.core.pytorch.constants import MOVING_MEAN, MOVING
 def create_node_prior_info(node: BaseNode,
+                           fw_info: FrameworkInfo,
                            graph: Graph):
     """
     Create a NodePriorInfo object for a given node.
     Args:
         node: Node to create its prior info.
+        fw_info: Information about a specific framework the node was generated from.
         graph: Graph to check the next node type.
     Returns:
         NodePriorInfo object with info about the node.
     """
-    min_output, max_output = node.minmax
+    min_output, max_output = None, None
+    if fw_info.layers_has_min_max(node.type):
+        min_output, max_output = fw_info.layer_min_max_mapping[node.type]
     mean_output, std_output = _get_mean_std_outputs(node=node,
                                                     graph=graph)
     return NodePriorInfo(min_output=min_output,

model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py CHANGED Viewed

@@ -27,7 +27,7 @@ from model_compression_toolkit.target_platform_capabilities.tpc_io_handler impor
 from model_compression_toolkit.verify_packages import FOUND_TORCH
 if FOUND_TORCH:
-    from model_compression_toolkit.core.pytorch.default_framework_info import set_pytorch_info
+    from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
     from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
     from torch.nn import Module
     from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2pytorch import \
@@ -38,7 +38,6 @@ if FOUND_TORCH:
     PYTORCH_DEFAULT_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
-    @set_pytorch_info
     def pytorch_resource_utilization_data(in_model: Module,
                                           representative_data_gen: Callable,
                                           core_config: CoreConfig = CoreConfig(),
@@ -94,6 +93,7 @@ if FOUND_TORCH:
                                                  representative_data_gen,
                                                  core_config,
                                                  target_platform_capabilities,
+                                                 DEFAULT_PYTORCH_INFO,
                                                  fw_impl)
 else:

model_compression_toolkit/core/pytorch/statistics_correction/pytorch_compute_activation_bias_correction_of_graph.py CHANGED Viewed

@@ -18,7 +18,7 @@ from torch.nn import Conv2d, Linear, ConvTranspose2d
 from model_compression_toolkit.core import QuantizationConfig
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.pytorch.quantization.activation_quantization_fn_factory import get_activation_quantization_fn_factory
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core.common.statistics_correction.compute_activation_bias_correction_of_graph import \
     compute_activation_bias_correction_of_graph
@@ -33,6 +33,7 @@ def activation_bias_correction_node_matchers():
 def pytorch_compute_activation_bias_correction_of_graph(graph: Graph,
                                                         quant_config: QuantizationConfig,
+                                                        fw_info: FrameworkInfo,
                                                         fw_impl: FrameworkImplementation) -> Graph:
     """
     Compute the activation bias correction term for graph based on a PyTorch model.
@@ -40,6 +41,7 @@ def pytorch_compute_activation_bias_correction_of_graph(graph: Graph,
     Args:
         graph: Graph with nodes to compute the activation bias correction.
         quant_config: QuantizationConfig of how the model should be quantized.
+        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -47,9 +49,9 @@ def pytorch_compute_activation_bias_correction_of_graph(graph: Graph,
     """
     graph = compute_activation_bias_correction_of_graph(graph=graph,
                                                         quant_config=quant_config,
+                                                        fw_info=fw_info,
                                                         fw_impl=fw_impl,
                                                         activation_bias_correction_node_matchers=
                                                         activation_bias_correction_node_matchers,
-                                                        kernel_size=KERNEL_SIZE,
-                                                        get_activation_quantization_fn_factory=get_activation_quantization_fn_factory)
+                                                        kernel_size=KERNEL_SIZE)
     return graph

model_compression_toolkit/core/quantization_prep_runner.py CHANGED Viewed

@@ -37,6 +37,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
 def quantization_preparation_runner(graph: Graph,
                                     representative_data_gen: Callable,
                                     core_config: CoreConfig,
+                                    fw_info: FrameworkInfo,
                                     fw_impl: FrameworkImplementation,
                                     tb_w: TensorboardWriter = None,
                                     hessian_info_service: HessianInfoService = None, ) -> Graph:
@@ -52,6 +53,8 @@ def quantization_preparation_runner(graph: Graph,
         graph: A graph representation of the model to be quantized.
         representative_data_gen: Dataset used for calibration.
         core_config: CoreConfig containing parameters of how the model should be quantized
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.).
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w: TensorboardWriter object for logging
         hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
@@ -65,6 +68,7 @@ def quantization_preparation_runner(graph: Graph,
     ######################################
     mi = ModelCollector(graph,
                         fw_impl,
+                        fw_info,
                         hessian_info_service,
                         core_config.quantization_config)  # Mark points for statistics collection
@@ -81,14 +85,14 @@ def quantization_preparation_runner(graph: Graph,
     # Notice that not all actions affect at this stage (for example, actions that edit the final configuration as
     # there are no final configurations at this stage of the optimization). For this reason we edit the graph
     # again at the end of the optimization process.
-    edit_network_graph(graph, core_config.debug_config.network_editor)
+    edit_network_graph(graph, fw_info, core_config.debug_config.network_editor)
     ######################################
     # Calculate quantization params
     ######################################
-    calculate_quantization_params(graph, core_config.quantization_config, fw_impl=fw_impl,
-                                  repr_data_gen_fn=representative_data_gen, hessian_info_service=hessian_info_service)
+    calculate_quantization_params(graph, fw_impl=fw_impl, repr_data_gen_fn=representative_data_gen,
+                                  hessian_info_service=hessian_info_service)
     if tb_w is not None:
         tb_w.add_graph(graph, 'thresholds_selection')
@@ -105,7 +109,8 @@ def quantization_preparation_runner(graph: Graph,
     ######################################
     if core_config.quantization_config.shift_negative_activation_correction:
         transformed_graph = fw_impl.shift_negative_correction(transformed_graph,
-                                                              core_config)
+                                                              core_config,
+                                                              fw_info)
         if tb_w is not None:
             tb_w.add_graph(transformed_graph, 'after_shift_negative_correction')
             tb_w.add_all_statistics(transformed_graph, 'after_shift_negative_correction')
@@ -117,9 +122,9 @@ def quantization_preparation_runner(graph: Graph,
     ######################################
     # Statistics Correction
     ######################################
-    tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_impl, tb_w)
+    tg_with_bias = statistics_correction_runner(transformed_graph, core_config, fw_info, fw_impl, tb_w)
     for n in tg_with_bias.nodes:
         assert n.final_weights_quantization_cfg is None
-    return tg_with_bias
+    return tg_with_bias

model_compression_toolkit/core/runner.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import copy
 from typing import Callable, Any, List, Optional
+from model_compression_toolkit.core.common import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.fusion.graph_fuser import GraphFuser
 from model_compression_toolkit.core.common.graph.base_graph import Graph
@@ -45,6 +46,7 @@ from model_compression_toolkit.target_platform_capabilities.targetplatform2frame
 def core_runner(in_model: Any,
                 representative_data_gen: Callable,
                 core_config: CoreConfig,
+                fw_info: FrameworkInfo,
                 fw_impl: FrameworkImplementation,
                 fqc: FrameworkQuantizationCapabilities,
                 target_resource_utilization: ResourceUtilization = None,
@@ -63,6 +65,7 @@ def core_runner(in_model: Any,
         in_model: Model to quantize.
         representative_data_gen: Dataset used for calibration.
         core_config: CoreConfig containing parameters of how the model should be quantized
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.).
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
@@ -96,6 +99,7 @@ def core_runner(in_model: Any,
     graph = graph_preparation_runner(in_model,
                                      representative_data_gen,
                                      core_config.quantization_config,
+                                     fw_info,
                                      fw_impl,
                                      fqc,
                                      core_config.bit_width_config,
@@ -108,6 +112,7 @@ def core_runner(in_model: Any,
     tg = quantization_preparation_runner(graph=graph,
                                          representative_data_gen=representative_data_gen,
                                          core_config=core_config,
+                                         fw_info=fw_info,
                                          fw_impl=fw_impl,
                                          tb_w=tb_w,
                                          hessian_info_service=hessian_info_service)
@@ -118,8 +123,9 @@ def core_runner(in_model: Any,
     if core_config.is_mixed_precision_enabled:
         if core_config.mixed_precision_config.configuration_overwrite is None:
-            filter_candidates_for_mixed_precision(graph, target_resource_utilization)
+            filter_candidates_for_mixed_precision(graph, target_resource_utilization, fw_info, fqc)
             bit_widths_config = search_bit_width(tg,
+                                                 fw_info,
                                                  fw_impl,
                                                  target_resource_utilization,
                                                  core_config.mixed_precision_config,
@@ -147,20 +153,22 @@ def core_runner(in_model: Any,
     ######################################
     if core_config.quantization_config.activation_bias_correction:
         tg = fw_impl.compute_activation_bias_correction(graph=tg,
-                                                        quant_config=core_config.quantization_config)
+                                                        quant_config=core_config.quantization_config,
+                                                        fw_info=fw_info)
     # Edit the graph again after finalizing the configurations.
     # This is since some actions regard the final configuration and should be edited.
-    edit_network_graph(tg, core_config.debug_config.network_editor)
+    edit_network_graph(tg, fw_info, core_config.debug_config.network_editor)
     _set_final_resource_utilization(graph=tg,
                                     final_bit_widths_config=bit_widths_config,
                                     target_resource_utilization=target_resource_utilization,
+                                    fw_info=fw_info,
                                     fw_impl=fw_impl)
     if core_config.is_mixed_precision_enabled:
         # Retrieve lists of tuples (node, node's final weights/activation bitwidth)
-        weights_conf_nodes_bitwidth = tg.get_final_weights_config()
+        weights_conf_nodes_bitwidth = tg.get_final_weights_config(fw_info)
         activation_conf_nodes_bitwidth = tg.get_final_activation_config()
         if len(weights_conf_nodes_bitwidth) > 0:
@@ -192,6 +200,7 @@ def core_runner(in_model: Any,
 def _set_final_resource_utilization(graph: Graph,
                                     final_bit_widths_config: List[int],
                                     target_resource_utilization: Optional[ResourceUtilization],
+                                    fw_info: FrameworkInfo,
                                     fw_impl: FrameworkImplementation):
     """
     Computing the resource utilization of the model according to the final bit-width configuration,
@@ -201,13 +210,14 @@ def _set_final_resource_utilization(graph: Graph,
         graph: Graph to compute the resource utilization for.
         final_bit_widths_config: The final bit-width configuration to quantize the model accordingly.
         target_resource_utilization: Requested target resource utilization if relevant.
+        fw_info: A FrameworkInfo object.
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
     """
     ru_targets = target_resource_utilization.get_restricted_targets() if target_resource_utilization else None
     final_ru = None
     if ru_targets:
-        ru_calculator = ResourceUtilizationCalculator(graph, fw_impl)
+        ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
         w_qcs = {n.name: n.final_weights_quantization_cfg for n in graph.nodes}
         a_qcs = {n.name: n.final_activation_quantization_cfg for n in graph.nodes}
         final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantizedNonFused,

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl