PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250927.534__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250927.534py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.4.0.20250925.000543"
+__version__ = "2.4.2.20250927.000534"

model_compression_toolkit/core/analyzer.py CHANGED Viewed

@@ -32,7 +32,8 @@ def analyzer_model_quantization(representative_data_gen: Callable,
                                 tb_w: TensorboardWriter,
                                 float_graph: Graph,
                                 quantized_graph: Graph,
-                                fw_impl: FrameworkImplementation):
+                                fw_impl: FrameworkImplementation,
+                                fw_info: FrameworkInfo):
     """
     Plot the cosine similarity of different points on the graph between the float and quantized
     graphs. Add them to the passed TensorboardWriter object and close all tensorboard writer open
@@ -44,12 +45,14 @@ def analyzer_model_quantization(representative_data_gen: Callable,
         float_graph: Graph of float model.
         quantized_graph: Graph of quantized model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+        fw_info: Information needed for quantization about the specific framework.
     """
     if tb_w is not None:
         visual = NNVisualizer(float_graph,
                               quantized_graph,
-                              fw_impl=fw_impl)
+                              fw_impl=fw_impl,
+                              fw_info=fw_info)
         if not visual.has_compare_points():
             Logger.error(f'No comparing points were found to plot analyze similarity.')
         else:

model_compression_toolkit/core/common/back2framework/base_model_builder.py CHANGED Viewed

@@ -15,6 +15,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, Tuple
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.user_info import UserInformation
@@ -27,17 +28,20 @@ class BaseModelBuilder(ABC):
     def __init__(self,
                  graph: common.Graph,
                  append2output=None,
+                 fw_info: FrameworkInfo = None,
                  return_float_outputs: bool = False):
         """
         Args:
             graph: Graph to build the model from.
             append2output: Nodes of graph to append to model's output.
+            fw_info: Information about the specific framework of the model that is built.
             return_float_outputs: Whether the model returns float tensors or not.
         """
         self.graph = graph
         self.append2output = append2output
+        self.fw_info = fw_info
         self.return_float_outputs = return_float_outputs
     @abstractmethod

model_compression_toolkit/core/common/collectors/base_collector.py CHANGED Viewed

@@ -13,12 +13,11 @@
 # limitations under the License.
 # ==============================================================================
-from abc import ABC, abstractmethod
 import numpy as np
 from model_compression_toolkit.logger import Logger
-class BaseCollector(ABC):
+class BaseCollector(object):
     """
     Base class for statistics collection object.
     """
@@ -27,7 +26,6 @@ class BaseCollector(ABC):
         # When manipulation statistics in a granularity they were not collected by, the data is invalid.
         self.is_legal = True
-    @abstractmethod
     def scale(self, scale_factor: np.ndarray):
         """
         Scale all statistics in collector by some factor.
@@ -39,7 +37,6 @@ class BaseCollector(ABC):
         raise NotImplemented(
             f'{self.__class__.__name__} needs to implement scale operation for its state.')  # pragma: no cover
-    @abstractmethod
     def shift(self, shift_value: np.ndarray):
         """
         Shift all statistics in collector by some value.

model_compression_toolkit/core/common/collectors/mean_collector.py CHANGED Viewed

@@ -87,13 +87,10 @@ class MeanCollector(BaseCollector):
             x: Tensor that goes through the mean collector and needs to be considered in the mean computation.
         """
         self.i += 1  # Update the iteration index
-        if self.axis is None:
-            mu = np.mean(np.reshape(x, [1, -1]), axis=-1)  # mean per channel for a batch
-        else:
-            axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
-            n = x.shape[axis]
-            transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
-            mu = np.mean(np.reshape(np.transpose(x, transpose_index), [n, -1]), axis=-1) # mean per channel for a batch
+        axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
+        n = x.shape[axis]
+        transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
+        mu = np.mean(np.reshape(np.transpose(x, transpose_index), [n, -1]), axis=-1) # mean per channel for a batch
         self.current_sum += mu # sum of all batches
         self.current_mean = self.current_sum / self.i # mean of all batches

model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.py CHANGED Viewed

@@ -130,13 +130,10 @@ class MinMaxPerChannelCollector(BaseCollector):
             x: Tensor that goes through the collector and needs to be considered in the min/max computation.
         """
-        if self.axis is None:
-            x_reshape = np.reshape(x, [1, -1])
-        else:
-            axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
-            n = x.shape[axis]
-            transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
-            x_reshape = np.reshape(np.transpose(x, transpose_index), [n, -1])
+        axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
+        n = x.shape[axis]
+        transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
+        x_reshape = np.reshape(np.transpose(x, transpose_index), [n, -1])
         if self.state is None:
             x_max = np.max(x_reshape, axis=-1)
             x_min = np.min(x_reshape, axis=-1)

model_compression_toolkit/core/common/framework_implementation.py CHANGED Viewed

@@ -125,16 +125,18 @@ class FrameworkImplementation(ABC):
                       graph: Graph,
                       mode: ModelBuilderMode,
                       append2output: List[Any],
+                      fw_info: FrameworkInfo,
                       return_float_outputs: bool = False) -> Tuple:
         """
         Build a framework model from a graph.
-        The mode determines how the model should be built. append2output is a list of Nodes
+        The mode determines how the model should be build. append2output is a list of Nodes
         to set as the model outputs.
         Args:
             graph: Graph to build the model from it.
             mode: Mode for how to build the model.
             append2output: List of Nodes to set as the model's outputs.
+            fw_info: FrameworkInfo object with information about the specific framework's model
             return_float_outputs (bool): whether to return outputs before or after quantization nodes (default)
         Returns:
@@ -168,13 +170,15 @@ class FrameworkImplementation(ABC):
     @abstractmethod
     def shift_negative_correction(self,
                                   graph: Graph,
-                                  core_config: CoreConfig) -> Graph:
+                                  core_config: CoreConfig,
+                                  fw_info: FrameworkInfo) -> Graph:
         """
         Apply shift negative correction (SNC) on a graph.
         Args:
             graph: Graph to apply SNC on.
             core_config: Quantization configuration.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             Graph after SNC.
@@ -185,13 +189,15 @@ class FrameworkImplementation(ABC):
     @abstractmethod
     def compute_activation_bias_correction(self,
                                            graph: Graph,
-                                           quant_config: QuantizationConfig) -> Graph:
+                                           quant_config: QuantizationConfig,
+                                           fw_info: FrameworkInfo) -> Graph:
         """
         Compute activation bias correction on a graph.
         Args:
             graph: Graph to apply activation bias correction on.
             quant_config: QuantizationConfig of how the model should be quantized.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             Graph after activation bias correction computing.
@@ -201,28 +207,30 @@ class FrameworkImplementation(ABC):
     @abstractmethod
     def get_substitutions_channel_equalization(self,
-                                               quant_config: QuantizationConfig) -> List[common.BaseSubstitution]:
+                                               quant_config: QuantizationConfig,
+                                               fw_info: FrameworkInfo) -> List[common.BaseSubstitution]:
         """
         Return a list of the framework substitutions used for channel equalization.
         Args:
             quant_config: QuantizationConfig to determine which substitutions to return.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             A list of the framework substitutions used after we collect statistics.
         """
         raise NotImplementedError(f'{self.__class__.__name__} has to implement the '
-                                  f'framework\'s get_substitutions_channel_equalization method.')  # pragma: no cover
+                             f'framework\'s get_substitutions_channel_equalization method.')  # pragma: no cover
     @abstractmethod
-    def get_substitutions_prepare_graph(self) -> List[common.BaseSubstitution]:
+    def get_substitutions_prepare_graph(self, fw_info: FrameworkInfo = None) -> List[common.BaseSubstitution]:
         """
         Returns: A list of the framework substitutions used to prepare the graph.
         """
         raise NotImplementedError(f'{self.__class__.__name__} has to implement the '
-                                  f'framework\'s get_substitutions_prepare_graph method.')  # pragma: no cover
+                             f'framework\'s get_substitutions_prepare_graph method.')  # pragma: no cover
     @abstractmethod
     def get_substitutions_pre_statistics_collection(self, quant_config: QuantizationConfig) -> \
@@ -320,12 +328,14 @@ class FrameworkImplementation(ABC):
                              f'method.')  # pragma: no cover
     def get_node_prior_info(self, node: BaseNode,
+                            fw_info: FrameworkInfo,
                             graph: Graph) -> NodePriorInfo:
         """
         Get a NodePriorInfo object for a node.
         Args:
             node: Node to get its prior info.
+            fw_info: Framework specific information needed to create the prior info of the node.
             graph: Graph to check the next node type.
         Returns:
@@ -333,7 +343,7 @@ class FrameworkImplementation(ABC):
         """
         raise NotImplementedError(f'{self.__class__.__name__} has to implement the '
-                                  f'framework\'s get_node_prior_info method.')  # pragma: no cover
+                             f'framework\'s get_node_prior_info method.')  # pragma: no cover
     def count_node_for_mixed_precision_interest_points(self, node: BaseNode) -> bool:
         """
@@ -384,18 +394,20 @@ class FrameworkImplementation(ABC):
     @abstractmethod
     def get_node_mac_operations(self,
-                                node: BaseNode) -> float:
+                                node: BaseNode,
+                                fw_info: FrameworkInfo) -> float:
         """
         Gets the MAC operation count for a given operation.
         Args:
             node: A graph node that wraps the operation for which the MAC count is computed.
+            fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns: The MAC count of the operation
         """
         raise NotImplementedError(f'{self.__class__.__name__} has to implement the '
-                                  f'framework\'s get_node_mac_operations method.')  # pragma: no cover
+                             f'framework\'s get_node_mac_operations method.')  # pragma: no cover
     @abstractmethod
     def apply_second_moment_correction(self,

model_compression_toolkit/core/common/framework_info.py CHANGED Viewed

@@ -13,9 +13,19 @@
 # limitations under the License.
 # ==============================================================================
+from collections.abc import Callable
 from enum import Enum
-from typing import Dict, Any, Tuple, NamedTuple, Optional
-from abc import ABC, abstractmethod
+from typing import Dict, Any, List
+from mct_quantizers import QuantizationMethod
+from model_compression_toolkit.defaultdict import DefaultDict
+# Default value to use for ops without kernel.
+# This is a weird default, but it's used all over the place, so for now only extract it to const so that it can be
+# referenced by variable instead of hard-coded.
+DEFAULT_KERNEL_ATTRIBUTES = [None]
 class ChannelAxis(Enum):
@@ -32,67 +42,89 @@ class ChannelAxis(Enum):
     NCHW = 1
-class ChannelAxisMapping(NamedTuple):
-    output: int
-    input: int
+class FrameworkInfo:
+    def __init__(self,
+                 activation_quantizer_mapping: Dict[QuantizationMethod, Callable],
+                 kernel_channels_mapping: DefaultDict,
+                 activation_min_max_mapping: Dict[str, tuple],
+                 layer_min_max_mapping: Dict[Any, tuple],
+                 kernel_ops_attributes_mapping: DefaultDict,
+                 out_channel_axis_mapping: DefaultDict):
+        """
+        A class to wrap all information about a specific framework the library needs to quantize a model.
+        Specifically, FrameworkInfo holds lists of layers by how they should be quantized, and multiple mappings such as
+        layer to it kernel channels indices, and a layer to its min/max values, etc.
+        The layers lists are divided into three groups:
+        kernel_ops: Layers that have coefficients and need to get quantized (e.g., Conv2D, Dense, etc.)
+        activation_ops: Layers that their outputs should get quantized (e.g., Add, ReLU, etc.)
+        no_quantization_ops:Layers that should not get quantized (e.g., Reshape, Transpose, etc.)
+        Args:
+            activation_quantizer_mapping (Dict[QuantizationMethod, Callable]): A dictionary mapping from QuantizationMethod to a quantization function.
+            kernel_channels_mapping (DefaultDict): Dictionary from a layer to a tuple of its kernel in/out channels indices.
+            activation_min_max_mapping (Dict[str, tuple]): Dictionary from an activation function to its min/max output values.
+            layer_min_max_mapping (Dict[Any, tuple]): Dictionary from a layer to its min/max output values.
+            kernel_ops_attributes_mapping (DefaultDict): Dictionary from a framework operator to a list of its weights attirbutes to quantize.
+            out_channel_axis_mapping (DefaultDict): Dictionary of output channels of the model's layers (for computing statistics per-channel).
-class FrameworkInfo(ABC):
-    """
-    A class to wrap all information about a specific framework the library needs to quantize a model.
-    Specifically, FrameworkInfo holds lists of layers by how they should be quantized, and multiple mappings such as
-    layer to it kernel channels indices, and a layer to its min/max values, etc.
-    The layers lists are divided into three groups:
-    kernel_ops: Layers that have coefficients and need to get quantized (e.g., Conv2D, Dense, etc.)
-    activation_ops: Layers that their outputs should get quantized (e.g., Add, ReLU, etc.)
-    no_quantization_ops:Layers that should not get quantized (e.g., Reshape, Transpose, etc.)
-    Fields:
-        kernel_channels_mapping (Dict): Dictionary from a layer to a tuple of its kernel in/out channels indices.
-        kernel_ops_attribute_mapping (Dict): Dictionary from a framework operator to its weight attribute to quantize.
-        out_channel_axis_mapping (Dict): Dictionary of output channels of the model's layers (for computing statistics per-channel).
-        _layer_min_max_mapping (Dict[Any, tuple]): Dictionary from a layer to its min/max output values.
-    """
+        Examples:
+            When quantizing a Keras model, if we want to quantize the kernels of Conv2D layers only, we can
+            set, and we know it's kernel out/in channel indices are (3, 2) respectivly:
+            >>> import tensorflow as tf
+            >>> kernel_ops = [tf.keras.layers.Conv2D]
+            >>> kernel_channels_mapping = DefaultDict({tf.keras.layers.Conv2D: (3,2)})
-    kernel_ops_attribute_mapping: Dict[Any, str]
-    kernel_channels_mapping: Dict[Any, ChannelAxisMapping]
-    out_channel_axis_mapping: Dict[Any, int]
+            Then, we can create a FrameworkInfo object:
-    _layer_min_max_mapping: Dict[Any, tuple]
-    _default_channel_mapping = ChannelAxisMapping(None, None)
+            >>> FrameworkInfo(kernel_channels_mapping, {}, {})
+            If an activation layer (tf.keras.layers.Activation) should be quantized and we know it's min/max outputs range in advanced, we can add it to activation_min_max_mapping for saving the statistics collection time. For example:
+            >>> activation_min_max_mapping = {'softmax': (0, 1)}
+            >>> FrameworkInfo(kernel_channels_mapping, activation_min_max_mapping, {})
+            If a layer's activations should be quantized and we know it's min/max outputs range in advanced, we can add it to layer_min_max_mapping for saving the statistics collection time. For example:
+            >>> layer_min_max_mapping = {tf.keras.layers.Softmax: (0, 1)}
+            >>> FrameworkInfo(kernel_channels_mapping, activation_min_max_mapping, layer_min_max_mapping)
-    @classmethod
-    def get_kernel_op_attribute(cls, node_type: Any) -> Optional[str]:
         """
-        Get attribute of a layer's weight to quantize.
+        self.activation_quantizer_mapping = activation_quantizer_mapping
+        self.kernel_channels_mapping = kernel_channels_mapping
+        self.activation_min_max_mapping = activation_min_max_mapping
+        self.layer_min_max_mapping = layer_min_max_mapping
+        self.kernel_ops_attributes_mapping = kernel_ops_attributes_mapping
+        self.out_channel_axis_mapping = out_channel_axis_mapping
+    def get_kernel_op_attributes(self, node_type: Any) -> List[str]:
+        """
+        Get a list of attributes of a layer's weights to quantize.
         Args:
-            node_type: Layer to get its attribute.
+            node_type: Layer to get its attributes.
         Returns:
-            Attribute the layer has and should be quantized.
+            A list of attributes the layer has and should be quantized.
         """
-        return cls.kernel_ops_attribute_mapping.get(node_type)
+        attr_list = self.kernel_ops_attributes_mapping.get(node_type)
+        return attr_list
-    @classmethod
-    def get_layer_min_max(cls, layer: Any, fw_attrs: Dict) -> Tuple[float, float]:
+    def is_kernel_op(self, node_type: Any) -> bool:
         """
-        Return layer min/max mapping the FrameworkInfo holds.
+        Check is the node is a kernel operation.
         Args:
-            layer: A layer to check if has a min/max known values.
-            fw_attrs: framework attributes from framework layer.
+            node_type: Layer to get its attributes.
         Returns:
-            Layer's min/max known values.
+            True if node type is a kernel operation, else False.
         """
+        return node_type in self.kernel_ops_attributes_mapping.keys()
-        if cls.layers_has_min_max(layer):
-            return cls._layer_min_max_mapping[layer]
-        else:
-            return None, None
-    @classmethod
-    def layers_has_min_max(cls, layer: Any) -> bool:
+    def layers_has_min_max(self, layer: Any) -> bool:
         """
         Check if a layer is in a layer to min/max mapping the FrameworkInfo holds.
         Args:
@@ -102,59 +134,17 @@ class FrameworkInfo(ABC):
             Whether a layer has a min/max known values or not.
         """
-        return layer in cls._layer_min_max_mapping
+        return layer in self.layer_min_max_mapping
-    @classmethod
-    @abstractmethod
-    def get_kernel_channels(cls, node_type: Any) -> ChannelAxisMapping:
-        """
-        Returns node's channels mapping from kernel_channels_mapping or framework specific default value.
-        Args:
-            node_type: A node type
-        Returns:
-            Node's channels mapping.
+    def activation_has_min_max(self, activation_name: str) -> bool:
         """
-        pass
+        Check if an activation layer has a min/max mapping.
-    @classmethod
-    @abstractmethod
-    def get_out_channel_axis(cls, node_type: Any):
-        """
-        Returns node's output channel mapping from out_channel_axis_mapping or framework specific default value.
         Args:
-            node_type: A node type.
+            activation_name: String of the activation function to check for its min/max values.
         Returns:
-            Node's output channel axis.
+            Whether an activation layer has a min/max known values or not.
         """
-        pass
-# Pointer to current FrameworkInfo class.
-_current_framework_info: type[FrameworkInfo] = None
-def get_fw_info():
-    """
-    A common function to get the current FrameworkInfo class. Raises an error if the pointer wasn't initialized.
-    Returns: FrameworkInfo class.
-    """
-    assert _current_framework_info is not None, "fw_info isn't initialized."
-    return _current_framework_info
-def set_fw_info(fw_info: type[FrameworkInfo]):
-    """
-    A common function to set the current FrameworkInfo class. Raises an error if fw_info doesn't inherit from FrameworkInfo.
-    Args:
-        fw_info: Framework specific object implementing the FrameworkInfo.
-    """
-    global _current_framework_info
-    assert _current_framework_info in [None, _current_framework_info], "FrameworkInfo already initialized."
-    assert issubclass(fw_info, FrameworkInfo), "fw_info must inherit from FrameworkInfo."
-    _current_framework_info = fw_info
+        return activation_name in self.activation_min_max_mapping

model_compression_toolkit/core/common/fusion/graph_fuser.py CHANGED Viewed

@@ -14,12 +14,12 @@
 #  ==============================================================================
 import copy
-from typing import Tuple
+from typing import List, Tuple
 from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfoGenerator
 from model_compression_toolkit.core.common.graph.base_graph import Graph, BaseNode, OutTensor
-from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
-    CandidateNodeQuantizationConfig, NodeQuantizationConfig
+from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import CandidateNodeQuantizationConfig
+from itertools import product
 class FusedLayerType:
@@ -30,7 +30,6 @@ class FusedLayerType:
     def __init__(self):
         self.__name__ = 'FusedLayer'
 class GraphFuser:
     def apply_node_fusion(self, graph: Graph) -> Graph:
         """
@@ -65,6 +64,7 @@ class GraphFuser:
         return graph_copy
     @staticmethod
     def _create_fused_node(fused_node_id: str, nodes: Tuple[BaseNode]) -> BaseNode:
         """
@@ -86,15 +86,10 @@ class GraphFuser:
                               weights={},
                               layer_class=FusedLayerType)
-        base_cfg = CandidateNodeQuantizationConfig(
-            activation_quantization_cfg=nodes[-1].quantization_cfg.base_quantization_cfg.activation_quantization_cfg,
-            weights_quantization_cfg=None
-        )
         activation_cfgs = [c.activation_quantization_cfg for c in nodes[-1].candidates_quantization_cfg]
-        candidates = [CandidateNodeQuantizationConfig(weights_quantization_cfg=None, activation_quantization_cfg=a)
-                      for a in activation_cfgs]
-        fused_node.quantization_cfg = NodeQuantizationConfig(base_quantization_cfg=base_cfg,
-                                                             candidates_quantization_cfg=candidates)
+        fused_node.candidates_quantization_cfg = [
+            CandidateNodeQuantizationConfig(weights_quantization_cfg=None, activation_quantization_cfg=a) for a in
+            activation_cfgs]
         # Keep the final configurations if they were set already.
         fused_node.final_weights_quantization_cfg = nodes[0].final_weights_quantization_cfg
@@ -163,3 +158,5 @@ class GraphFuser:
         # Finally, add the new fused node to the graph
         graph.add_node(fused_node)

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250927.534__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250927.534py3-none-any.whl