PyPI - mct-nightly - Versions diffs - 2.4.0.20250616.616__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250616.616py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

model_compression_toolkit/core/common/pruning/memory_calculator.py CHANGED Viewed

@@ -34,18 +34,16 @@ class MemoryCalculator:
     which is crucial for deploying models on memory-constrained devices or optimizing for computational efficiency.
     """
-    def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: PruningFrameworkImplementation):
+    def __init__(self, graph: Graph, fw_impl: PruningFrameworkImplementation):
         """
         Initializes the MemoryCalculator with necessary information about the model's graph,
         framework-specific details, and pruning implementation.
         Args:
             graph (Graph): Computational graph of the model.
-            fw_info (FrameworkInfo): Contains framework-specific information.
             fw_impl (PruningFrameworkImplementation): Implementation details for pruning.
         """
         self.graph = graph
-        self.fw_info = fw_info
         self.fw_impl = fw_impl
     def get_pruned_graph_memory(self,
@@ -204,19 +202,13 @@ class MemoryCalculator:
             if node == section.exit_node:
                 return masks.get(section.entry_node)
-        kernel_attr = self.fw_info.get_kernel_op_attributes(node.type)
-        # Ensure only one kernel attribute exists for the given node.
-        if len(kernel_attr) != 1:
-            Logger.critical(f"Expected a single attribute, but found {len(kernel_attr)} attributes for node '{node}'. Ensure the node configuration is correct.")
-        kernel_attr = kernel_attr[0]
         # Retrieve and validate the axis index for the output channels.
-        _, ic_axis = self.fw_info.kernel_channels_mapping.get(node.type)
+        ic_axis = node.channel_axis.input
         if ic_axis is None or int(ic_axis) != ic_axis:
             Logger.critical(f"Invalid input channel axis type for node '{node}': expected integer but got '{ic_axis}'.")
         # Get the number of output channels based on the kernel attribute and axis.
-        num_ic = node.get_weights_by_keys(kernel_attr).shape[ic_axis]
+        num_ic = node.get_weights_by_keys(node.kernel_attr).shape[ic_axis]
         mask = np.ones(num_ic, dtype=bool)
         return mask
@@ -289,7 +281,7 @@ class MemoryCalculator:
             int: The total number of parameters in the node after pruning.
         """
         total_params = 0
-        attributes_and_oc_axis = self.fw_impl.attrs_oi_channels_info_for_pruning(node, self.fw_info)
+        attributes_and_oc_axis = self.fw_impl.attrs_oi_channels_info_for_pruning(node)
         # Iterate over the node's weights and apply pruning based on the masks.
         for w_attr, w in node.weights.items():
@@ -311,7 +303,7 @@ class MemoryCalculator:
             num_oc = np.sum(output_mask)
         else:
             # Get the node channel axis from framework info
-            channel_axis = self.fw_info.out_channel_axis_mapping.get(node.type)
+            channel_axis = node.out_channel_axis
             if channel_axis is None:
                 Logger.critical(f"The channel axis is undefined. Please ensure the channel axis is explicitly defined for node {node.type} in the framework info.")

model_compression_toolkit/core/common/pruning/prune_graph.py CHANGED Viewed

@@ -27,7 +27,6 @@ from model_compression_toolkit.logger import Logger
 def build_pruned_graph(graph: Graph,
                        masks: Dict[BaseNode, np.ndarray],
-                       fw_info: FrameworkInfo,
                        fw_impl: FrameworkImplementation) -> Graph:
     """
     Prunes the provided graph according to the given pruning output-channels masks.
@@ -35,7 +34,6 @@ def build_pruned_graph(graph: Graph,
     Args:
         graph: The original computational graph to be pruned.
         masks: A dictionary mapping each prunable node to its pruning mask.
-        fw_info: Framework-specific information object.
         fw_impl: Framework-specific implementation object.
     Returns:
@@ -66,8 +64,7 @@ def build_pruned_graph(graph: Graph,
             section_mask = PruningSectionMask(entry_node_oc_mask=mask,
                                               exit_node_ic_mask=mask)
             pruning_section.apply_inner_section_mask(section_mask,
-                                                     fw_impl,
-                                                     fw_info)
+                                                     fw_impl)
     return graph_to_prune

model_compression_toolkit/core/common/pruning/pruner.py CHANGED Viewed

@@ -40,7 +40,6 @@ class Pruner:
     """
     def __init__(self,
                  float_graph: Graph,
-                 fw_info: FrameworkInfo,
                  fw_impl: PruningFrameworkImplementation,
                  target_resource_utilization: ResourceUtilization,
                  representative_data_gen: Callable,
@@ -49,7 +48,6 @@ class Pruner:
         """
         Args:
             float_graph (Graph): The floating-point representation of the model's computation graph.
-            fw_info (FrameworkInfo): Contains metadata and helper functions for the framework.
             fw_impl (PruningFrameworkImplementation): Implementation of specific framework methods required for pruning.
             target_resource_utilization (ResourceUtilization): The target resource utilization to be achieved after pruning.
             representative_data_gen (Callable): Generator function for representative dataset used in pruning analysis.
@@ -57,7 +55,6 @@ class Pruner:
             target_platform_capabilities (FrameworkQuantizationCapabilities): Object encapsulating the capabilities of the target hardware platform.
         """
         self.float_graph = float_graph
-        self.fw_info = fw_info
         self.fw_impl = fw_impl
         self.target_resource_utilization = target_resource_utilization
         self.representative_data_gen = representative_data_gen
@@ -84,7 +81,6 @@ class Pruner:
         # Apply Greedy strategy to compute masks based on importance scores.
         if self.pruning_config.channels_filtering_strategy == ChannelsFilteringStrategy.GREEDY:
             mask_calculator = GreedyMaskCalculator(entry_nodes,
-                                                   self.fw_info,
                                                    self.simd_scores,
                                                    self.target_resource_utilization,
                                                    self.float_graph,
@@ -99,7 +95,6 @@ class Pruner:
         Logger.info("Start pruning graph...")
         _pruned_graph = build_pruned_graph(self.float_graph,
                                            self.per_oc_mask,
-                                           self.fw_info,
                                            self.fw_impl)
         return _pruned_graph
@@ -116,7 +111,7 @@ class Pruner:
         # Retrieve and initialize the importance metric.
         im = get_importance_metric(self.pruning_config.importance_metric, graph=self.float_graph,
                                    representative_data_gen=self.representative_data_gen, fw_impl=self.fw_impl,
-                                   pruning_config=self.pruning_config, fw_info=self.fw_info)
+                                   pruning_config=self.pruning_config)
         entry_node_to_simd_score, simd_groups_indices = im.get_entry_node_to_simd_score(entry_nodes)
         return entry_node_to_simd_score, simd_groups_indices

model_compression_toolkit/core/common/pruning/pruning_framework_implementation.py CHANGED Viewed

@@ -28,15 +28,13 @@ class PruningFrameworkImplementation(FrameworkImplementation):
     @abstractmethod
     def prune_entry_node(self,
                          node: BaseNode,
-                         output_mask: np.ndarray,
-                         fw_info: FrameworkInfo):
+                         output_mask: np.ndarray):
         """
         Abstract method to prune an entry node in the model.
         Args:
             node: The node to be pruned.
             output_mask: A numpy array representing the mask to be applied to the output channels.
-            fw_info: Framework-specific information.
         Raises:
             NotImplemented: If the method is not implemented in the subclass.
@@ -48,8 +46,7 @@ class PruningFrameworkImplementation(FrameworkImplementation):
     def prune_intermediate_node(self,
                                 node: BaseNode,
                                 input_mask: np.ndarray,
-                                output_mask: np.ndarray,
-                                fw_info: FrameworkInfo):
+                                output_mask: np.ndarray):
         """
         Abstract method to prune an intermediate node in the model.
@@ -57,7 +54,6 @@ class PruningFrameworkImplementation(FrameworkImplementation):
             node: The node to be pruned.
             input_mask: Mask to be applied to the input channels.
             output_mask: Mask to be applied to the output channels.
-            fw_info: Framework-specific information.
         Raises:
             NotImplemented: If the method is not implemented in the subclass.
@@ -68,15 +64,13 @@ class PruningFrameworkImplementation(FrameworkImplementation):
     @abstractmethod
     def prune_exit_node(self,
                         node: BaseNode,
-                        input_mask: np.ndarray,
-                        fw_info: FrameworkInfo):
+                        input_mask: np.ndarray):
         """
         Abstract method to prune an exit node in the model.
         Args:
             node: The node to be pruned.
             input_mask: Mask to be applied to the input channels.
-            fw_info: Framework-specific information.
         Raises:
             NotImplemented: If the method is not implemented in the subclass.
@@ -105,8 +99,7 @@ class PruningFrameworkImplementation(FrameworkImplementation):
     @abstractmethod
     def is_node_exit_node(self,
                           node: BaseNode,
-                          corresponding_entry_node: BaseNode,
-                          fw_info: FrameworkInfo) -> bool:
+                          corresponding_entry_node: BaseNode) -> bool:
         raise NotImplemented(f'{self.__class__.__name__} have to implement the '
                              f'framework\'s is_node_exit_node method.')  # pragma: no cover
@@ -129,7 +122,7 @@ class PruningFrameworkImplementation(FrameworkImplementation):
         raise NotImplemented(f'{self.__class__.__name__} have to implement the '
                              f'framework\'s is_node_intermediate_pruning_section method.')  # pragma: no cover
-    def attrs_oi_channels_info_for_pruning(self, node: BaseNode, fw_info: FrameworkInfo) -> Dict[str, Tuple[int, int]]:
+    def attrs_oi_channels_info_for_pruning(self, node: BaseNode) -> Dict[str, Tuple[int, int]]:
         """
         Retrieves the attributes of a given node along with the output/input (OI) channel axis
         for each attribute used to prune these attributes.
@@ -146,7 +139,6 @@ class PruningFrameworkImplementation(FrameworkImplementation):
         Args:
             node (BaseNode): The node from the computational graph.
-            fw_info (FrameworkInfo): Contains framework-specific information and utilities.
         Returns:
             Dict[str, Tuple[int, int]]: A dictionary where each key is an attribute name (like 'kernel' or 'bias')

model_compression_toolkit/core/common/pruning/pruning_section.py CHANGED Viewed

@@ -76,34 +76,28 @@ class PruningSection:
     def apply_inner_section_mask(self,
                                  pruning_section_mask: PruningSectionMask,
-                                 fw_impl: Any,
-                                 fw_info: FrameworkInfo):
+                                 fw_impl: Any):
         """
         Apply the provided pruning section mask to all nodes within the pruning section.
         Args:
             pruning_section_mask (PruningSectionMask): The mask to be applied to the pruning section.
             fw_impl (PruningFrameworkImplementation): Framework-specific implementation for applying the mask.
-            fw_info (FrameworkInfo): Framework-specific information needed to apply the mask.
         """
         fw_impl.prune_entry_node(node=self.entry_node,
-                                 output_mask=pruning_section_mask.entry_node_oc_mask,
-                                 fw_info=fw_info)
+                                 output_mask=pruning_section_mask.entry_node_oc_mask)
         for inter_node in self.intermediate_nodes:
             fw_impl.prune_intermediate_node(node=inter_node,
                                             input_mask=pruning_section_mask.entry_node_oc_mask,
-                                            output_mask=pruning_section_mask.entry_node_oc_mask,
-                                            fw_info=fw_info)
+                                            output_mask=pruning_section_mask.entry_node_oc_mask)
         fw_impl.prune_exit_node(self.exit_node,
-                                input_mask=pruning_section_mask.exit_node_ic_mask,
-                                fw_info=fw_info)
+                                input_mask=pruning_section_mask.exit_node_ic_mask)
     @staticmethod
     def has_matching_channel_count(exit_node: BaseNode,
-                                   corresponding_entry_node: BaseNode,
-                                   fw_info: FrameworkInfo) -> bool:
+                                   corresponding_entry_node: BaseNode) -> bool:
         """
         Checks if the number of input channels of the exit node matches the number of output channels
         of its corresponding entry node.
@@ -115,13 +109,10 @@ class PruningSection:
         Returns:
             bool: True if the channel counts match, False otherwise.
         """
-        _, exit_input_channel_axis = fw_info.kernel_channels_mapping.get(exit_node.type)
-        entry_output_channel_axis, _ = fw_info.kernel_channels_mapping.get(corresponding_entry_node.type)
+        exit_input_channel_axis = exit_node.channel_axis.input
+        entry_output_channel_axis = corresponding_entry_node.channel_axis.output
-        exit_node_attr = fw_info.get_kernel_op_attributes(exit_node.type)[0]
-        entry_node_attr = fw_info.get_kernel_op_attributes(corresponding_entry_node.type)[0]
-        exit_input_channels = exit_node.get_weights_by_keys(exit_node_attr).shape[exit_input_channel_axis]
-        entry_output_channels = corresponding_entry_node.get_weights_by_keys(entry_node_attr).shape[entry_output_channel_axis]
+        exit_input_channels = exit_node.get_weights_by_keys(exit_node.kernel_attr).shape[exit_input_channel_axis]
+        entry_output_channels = corresponding_entry_node.get_weights_by_keys(corresponding_entry_node.kernel_attr).shape[entry_output_channel_axis]
         return exit_input_channels == entry_output_channels

model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from typing import Callable, List, Tuple
 from model_compression_toolkit.core import QuantizationConfig
+from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \
     NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig, \
@@ -40,7 +41,7 @@ class CandidateNodeQuantizationConfig(BaseNodeQuantizationConfig):
                  activation_quantization_fn: Callable = None,
                  activation_quantization_params_fn: Callable = None,
                  weights_quantization_cfg: NodeWeightsQuantizationConfig = None,
-                 weights_channels_axis: Tuple[int, int] = None,
+                 weights_channels_axis: ChannelAxisMapping = None,
                  node_attrs_list: List[str] = None):
         """

model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py CHANGED Viewed

@@ -34,7 +34,7 @@ def filter_nodes_candidates(graph: Graph):
     """
     nodes = list(graph.nodes)
     for n in nodes:
-        n.candidates_quantization_cfg = filter_node_candidates(node=n, fw_info=graph.fw_info)
+        n.candidates_quantization_cfg = filter_node_candidates(node=n)
     return graph
@@ -71,7 +71,7 @@ def _filter_bit_method_dups(candidates: List[CandidateNodeQuantizationConfig],
     return final_candidates
-def filter_node_candidates(node: BaseNode, fw_info) -> List[CandidateNodeQuantizationConfig]:
+def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConfig]:
     """
     Updates a node's candidates configuration list.
     If the node's weights quantization is disabled (or it only has activations to quantize), then the updated list
@@ -81,15 +81,13 @@ def filter_node_candidates(node: BaseNode, fw_info) -> List[CandidateNodeQuantiz
     Args:
         node: Node to set its quantization configurations.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
     """
     filtered_candidates = copy.deepcopy(node.candidates_quantization_cfg)
     final_candidates = copy.deepcopy(node.candidates_quantization_cfg)
-    kernel_attr = fw_info.get_kernel_op_attributes(node.type)[0]
-    if (kernel_attr is None or not node.is_weights_quantization_enabled(kernel_attr)) and not node.is_activation_quantization_enabled():
+    if (node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr)) and not node.is_activation_quantization_enabled():
         # If activation quantization is disabled and the node doesn't have a kernel or doesn't quantize the kernel,
         # but for some reason the node has multiple candidates then replace it with a single dummy candidate with
         # default bit-width values.
@@ -97,8 +95,8 @@ def filter_node_candidates(node: BaseNode, fw_info) -> List[CandidateNodeQuantiz
         single_dummy_candidate.activation_quantization_cfg.activation_n_bits = FLOAT_BITWIDTH
         single_dummy_candidate.activation_quantization_cfg.activation_quantization_method = QuantizationMethod.POWER_OF_TWO
-        if kernel_attr is not None:
-            kernel_config = single_dummy_candidate.weights_quantization_cfg.get_attr_config(kernel_attr)
+        if node.kernel_attr is not None:
+            kernel_config = single_dummy_candidate.weights_quantization_cfg.get_attr_config(node.kernel_attr)
             kernel_config.weights_n_bits = FLOAT_BITWIDTH
             kernel_config.weights_quantization_method = QuantizationMethod.POWER_OF_TWO
@@ -116,9 +114,9 @@ def filter_node_candidates(node: BaseNode, fw_info) -> List[CandidateNodeQuantiz
             c.activation_quantization_cfg.activation_n_bits = FLOAT_BITWIDTH
             c.activation_quantization_cfg.activation_quantization_method = QuantizationMethod.POWER_OF_TWO
-        final_candidates = _filter_bit_method_dups(filtered_candidates, kernel_attr)
+        final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
-    elif kernel_attr is None or not node.is_weights_quantization_enabled(kernel_attr):
+    elif node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr):
         # TODO:
         #  To allow MP on positional weights we need to modify this to consider all weights not only kernel.
         # Remove candidates that have duplicated activation candidates for node with disabled weights quantization.
@@ -129,11 +127,11 @@ def filter_node_candidates(node: BaseNode, fw_info) -> List[CandidateNodeQuantiz
                                and not seen_candidates.add(candidate.activation_quantization_cfg)]
         for c in filtered_candidates:
-            if kernel_attr is not None:
-                kernel_config = c.weights_quantization_cfg.get_attr_config(kernel_attr)
+            if node.kernel_attr is not None:
+                kernel_config = c.weights_quantization_cfg.get_attr_config(node.kernel_attr)
                 kernel_config.weights_n_bits = FLOAT_BITWIDTH
                 kernel_config.weights_quantization_method = QuantizationMethod.POWER_OF_TWO
-        final_candidates = _filter_bit_method_dups(filtered_candidates, kernel_attr)
+        final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
     return final_candidates

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Callable, Any, List, Tuple, Union, Dict, TYPE_CHECKING
 from enum import Enum, auto
 import numpy as np
+from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.quantization.quantization_fn_selection import get_weights_quantization_fn
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.quantization.quantization_params_fn_selection import \
@@ -262,7 +263,7 @@ class WeightsAttrQuantizationConfig:
     def __init__(self,
                  qc: QuantizationConfig,
                  weights_attr_cfg: AttributeQuantizationConfig,
-                 weights_channels_axis: Tuple[int, int] = None):
+                 weights_channels_axis: ChannelAxisMapping = None):
         """
         Args:
@@ -352,7 +353,7 @@ class WeightsAttrQuantizationConfig:
                                                     p=self.l_p_value,
                                                     n_bits=self.weights_n_bits,
                                                     per_channel=self.weights_per_channel_threshold and self.weights_channels_axis is not None,
-                                                    channel_axis=self.weights_channels_axis[0],  # output channel axis
+                                                    channel_axis=self.weights_channels_axis.output,  # output channel axis
                                                     min_threshold=min_threshold)[0]  # Take only first output, the q-params, as axis is already chosen.
             )
         else:
@@ -400,7 +401,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
     """
     def __init__(self, qc: QuantizationConfig,
                  op_cfg: OpQuantizationConfig,
-                 weights_channels_axis: Tuple[int, int],
+                 weights_channels_axis: ChannelAxisMapping,
                  node_attrs_list: List[str]):
         """

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -20,6 +20,7 @@ from typing import List, Callable, Generator
 from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
 from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
     HessianScoresGranularity
@@ -44,11 +45,8 @@ def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[Ba
     """
     hmse_nodes = []
     for n in nodes_list:
-        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
-        kernel_attr_name = None if kernel_attr_name is None or len(kernel_attr_name) == 0 else kernel_attr_name[0]
-        if kernel_attr_name is not None and n.is_weights_quantization_enabled(kernel_attr_name) and \
-            all([c.weights_quantization_cfg.get_attr_config(kernel_attr_name).weights_error_method ==
+        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and \
+            all([c.weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_error_method ==
                  QuantizationErrorMethod.HMSE for c in n.candidates_quantization_cfg]):
             hmse_nodes.append(n)
@@ -114,11 +112,7 @@ def calculate_quantization_params(graph: Graph,
                     if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
                         # Although we collected nodes for HMSE before running the loop, we keep this verification to
                         # notify the user in case of HMSE configured for node that is not compatible for this method
-                        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
-                        if len(kernel_attr_name) > 0:
-                            kernel_attr_name = kernel_attr_name[0]
-                        if kernel_attr_name is None or kernel_attr_name not in attr:
+                        if n.kernel_attr is None or n.kernel_attr not in attr:
                             Logger.warning(f"The HMSE error method for parameters selection is only supported for "
                                            f"kernel weights attributes. Running parameters selection for attribute "
                                            f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
@@ -132,7 +126,7 @@ def calculate_quantization_params(graph: Graph,
                                                                                node=n,
                                                                                hessian_info_service=hessian_info_service,
                                                                                num_hessian_samples=num_hessian_samples)
-                    attr_cfg.weights_channels_axis = (output_channels_axis, attr_cfg.weights_channels_axis[1])
+                    attr_cfg.weights_channels_axis = ChannelAxisMapping(output_channels_axis, attr_cfg.weights_channels_axis.input)
                     attr_cfg.set_weights_quantization_param(weights_params)
             if n.is_activation_quantization_enabled():

model_compression_toolkit/core/common/quantization/set_node_quantization_config.py CHANGED Viewed

@@ -20,7 +20,7 @@ from model_compression_toolkit.constants import WEIGHTS, ACTIVATION
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.framework_info import get_fw_info, ChannelAxisMapping
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
@@ -73,7 +73,6 @@ def set_quantization_configuration_to_graph(graph: Graph,
         set_quantization_configs_to_node(node=n,
                                          graph=graph,
                                          quant_config=quant_config,
-                                         fw_info=graph.fw_info,
                                          fqc=graph.fqc,
                                          mixed_precision_enable=mixed_precision_enable,
                                          manual_bit_width_override=manual_bit_width_override)
@@ -154,7 +153,6 @@ def filter_node_qco_by_graph(node: BaseNode,
 def set_quantization_configs_to_node(node: BaseNode,
                                      graph: Graph,
                                      quant_config: QuantizationConfig,
-                                     fw_info: FrameworkInfo,
                                      fqc: FrameworkQuantizationCapabilities,
                                      mixed_precision_enable: bool = False,
                                      manual_bit_width_override: Optional[Dict] = None):
@@ -165,7 +163,6 @@ def set_quantization_configs_to_node(node: BaseNode,
         node (BaseNode): Node to set its quantization configurations.
         graph (Graph): Model's internal representation graph.
         quant_config (QuantizationConfig): Quantization configuration to generate the node's configurations from.
-        fw_info (FrameworkInfo): Information needed for quantization about the specific framework.
         fqc (FrameworkQuantizationCapabilities): FrameworkQuantizationCapabilities to get default OpQuantizationConfig.
         mixed_precision_enable (bool): Whether mixed precision is enabled. Defaults to False.
         manual_bit_width_override (Optional[int]): Specifies a custom bit-width to override the node's activation bit-width. Defaults to None.
@@ -186,10 +183,8 @@ def set_quantization_configs_to_node(node: BaseNode,
         mixed_precision_enable=mixed_precision_enable)
     # Create QC candidates for weights and activation combined
-    weight_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
     node.candidates_quantization_cfg = _create_node_candidates_qc(quant_config,
-                                                                  fw_info,
-                                                                  weight_channel_axis,
+                                                                  node.channel_axis,
                                                                   node_qc_options_list,
                                                                   base_config,
                                                                   node,
@@ -198,7 +193,7 @@ def set_quantization_configs_to_node(node: BaseNode,
     # sorting the candidates by kernel attribute weights number of bits first and then by activation number of bits
     # (in reversed order). since only kernel attribute is quantized in weights mixed precision,
     # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
-    node.sort_node_candidates(fw_info)
+    node.sort_node_candidates()
     for candidate_qc in node.candidates_quantization_cfg:
         if candidate_qc.activation_quantization_cfg.quant_mode == ActivationQuantizationMode.QUANT and \
@@ -217,14 +212,12 @@ def set_quantization_configs_to_node(node: BaseNode,
 def create_node_activation_qc(qc: QuantizationConfig,
-                              fw_info: FrameworkInfo,
                               op_cfg: OpQuantizationConfig) -> NodeActivationQuantizationConfig:
     """
     Create an activation quantization configuration from a QuantizationConfig object.
     Args:
         qc: QuantizationConfig to create the node's config from.
-        fw_info: Information about the specific framework the node was created from (e.g., whether or not its
         weights/activations should be quantized)
         op_cfg: OpQuantizationConfig with quantizers types to set in node quantization configuration.
@@ -232,7 +225,7 @@ def create_node_activation_qc(qc: QuantizationConfig,
         Activation quantization configuration of a node.
     """
-    activation_quantization_fn = fw_info.activation_quantizer_mapping.get(op_cfg.activation_quantization_method)
+    activation_quantization_fn = get_fw_info().activation_quantizer_mapping.get(op_cfg.activation_quantization_method)
     if activation_quantization_fn is None:
         Logger.critical('Unknown activation quantization method specified.')  # pragma: no cover
@@ -245,8 +238,7 @@ def create_node_activation_qc(qc: QuantizationConfig,
 def _create_node_single_candidate_qc(qc: QuantizationConfig,
-                                     fw_info: FrameworkInfo,
-                                     weight_channel_axis: Tuple[int, int],
+                                     weight_channel_axis: ChannelAxisMapping,
                                      op_cfg: OpQuantizationConfig,
                                      node_attrs_list: List[str]) -> CandidateNodeQuantizationConfig:
     """
@@ -256,8 +248,6 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
     Args:
         qc: QuantizationConfig to create the node's config from.
-        fw_info: Information about the specific framework the node was created from (e.g., whether its
-            weights/activations should be quantized)
         weight_channel_axis: (Output, Input) channel index of the node's kernel.
         op_cfg: OpQuantizationConfig of the node with quantizers types to use when creating node quantization configuration.
         node_attrs_list: A list of the node's weights attributes names.
@@ -269,7 +259,7 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
     # parameters for weights attributes quantization are set within  CandidateNodeQuantizationConfig initialization
     # get parameters for activation quantization
-    activation_quantization_fn = fw_info.activation_quantizer_mapping.get(op_cfg.activation_quantization_method)
+    activation_quantization_fn = get_fw_info().activation_quantizer_mapping.get(op_cfg.activation_quantization_method)
     if activation_quantization_fn is None:
         Logger.critical('Unknown activation quantization method specified.')  # pragma: no cover
@@ -293,8 +283,7 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
 def _create_node_candidates_qc(qc: QuantizationConfig,
-                               fw_info: FrameworkInfo,
-                               weight_channel_axis: Tuple[int, int],
+                               weight_channel_axis: ChannelAxisMapping,
                                node_qc_options_list: List[OpQuantizationConfig],
                                base_config: OpQuantizationConfig,
                                node: BaseNode,
@@ -304,8 +293,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
     Args:
         qc (QuantizationConfig): Quantization configuration the quantization process should follow.
-        fw_info (FrameworkInfo): Framework information (e.g., which layers should have their kernels quantized).
-        weight_channel_axis (Tuple[int, int]): (Output, Input) channel index of the node's kernel.
+        weight_channel_axis (ChannelAxisMapping): (Output, Input) channel index of the node's kernel.
         node_qc_options_list (List[OpQuantizationConfig]): List of quantization configs of node.
         base_config (OpQuantizationConfig): Base quantization config for node.
         node (BaseNode): A node to set quantization configuration candidates to.
@@ -322,14 +310,12 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
         for op_cfg in node_qc_options_list:
             candidate_qc = copy.deepcopy(qc)
             candidates.append(_create_node_single_candidate_qc(candidate_qc,
-                                                               fw_info,
                                                                weight_channel_axis,
                                                                op_cfg,
                                                                node_attrs_list))
     else:
         candidates.append(_create_node_single_candidate_qc(qc,
-                                                           fw_info,
                                                            weight_channel_axis,
                                                            base_config,
                                                            node_attrs_list))

model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py CHANGED Viewed

@@ -38,8 +38,7 @@ def apply_activation_bias_correction_to_graph(graph: Graph,
     for n in graph.nodes:
         # Activation bias correction is only relevant for nodes with kernel op
-        kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
-        if core_config.quantization_config.activation_bias_correction and kernel_attr is not None and \
+        if core_config.quantization_config.activation_bias_correction and n.kernel_attr is not None and \
                 n.final_activation_quantization_cfg.activation_bias_correction_term is not None:
             # If activation bias correction is enabled in n.quantization_cfg, an activation bias correction term was
             # calculated during model preparation, and is used now in the node's bias term.

model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py CHANGED Viewed

@@ -41,9 +41,8 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     graph = copy.deepcopy(graph_to_apply_bias_correction)
     for n in graph.nodes:
         # bias correction is only relevant for nodes with kernel op
-        kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
-        if core_config.quantization_config.weights_bias_correction and kernel_attr is not None and \
-            n.is_weights_quantization_enabled(kernel_attr) and \
+        if core_config.quantization_config.weights_bias_correction and n.kernel_attr is not None and \
+            n.is_weights_quantization_enabled(n.kernel_attr) and \
                 not n.final_weights_quantization_cfg.weights_second_moment_correction:
             # If a kernel was quantized and weights bias correction is enabled in n.quantization_cfg,
             # a bias correction term was calculated during model preparation, and is used now in the node's bias term.

mct-nightly 2.4.0.20250616.616__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl

mct-nightly 2.4.0.20250616.616py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl