PyPI - mct-nightly - Versions diffs - 2.3.0.20250511.614__py3-none-any.whl → 2.3.0.20250513.611__py3-none-any.whl - Mend

mct-nightly 2.3.0.20250511.614py3-none-any.whl → 2.3.0.20250513.611py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py CHANGED Viewed

@@ -12,16 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import contextlib
 import copy
+import itertools
 import numpy as np
-from typing import Callable, Any, List, Tuple
+from typing import Callable, Any, List, Tuple, Dict, Optional
-from model_compression_toolkit.constants import AXIS
 from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import \
+    set_activation_quant_layer_to_bitwidth, set_weights_quant_layer_to_bitwidth
 from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
-from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence
 from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
 from model_compression_toolkit.logger import Logger
@@ -41,7 +43,6 @@ class SensitivityEvaluation:
                  representative_data_gen: Callable,
                  fw_info: FrameworkInfo,
                  fw_impl: Any,
-                 set_layer_to_bitwidth: Callable,
                  disable_activation_for_metric: bool = False,
                  hessian_info_service: HessianInfoService = None
                  ):
@@ -63,8 +64,6 @@ class SensitivityEvaluation:
             quant_config: MP Quantization configuration for how the graph should be quantized.
             representative_data_gen: Dataset used for getting batches for inference.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
-            set_layer_to_bitwidth: A fw-dependent function that allows to configure a configurable MP model
-                    with a specific bit-width configuration.
             disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
             hessian_info_service: HessianInfoService to fetch Hessian approximation information.
@@ -74,10 +73,9 @@ class SensitivityEvaluation:
         self.representative_data_gen = representative_data_gen
         self.fw_info = fw_info
         self.fw_impl = fw_impl
-        self.set_layer_to_bitwidth = set_layer_to_bitwidth
         self.disable_activation_for_metric = disable_activation_for_metric
         if self.quant_config.use_hessian_based_scores:
-            if not isinstance(hessian_info_service, HessianInfoService):
+            if not isinstance(hessian_info_service, HessianInfoService):    # pragma: no cover
                 Logger.critical(
                     f"When using Hessian-based approximations for sensitivity evaluation, a valid HessianInfoService object is required; found {type(hessian_info_service)}.")
             self.hessian_info_service = hessian_info_service
@@ -159,44 +157,44 @@ class SensitivityEvaluation:
             axis_list.append(axis if distance_fn == compute_kl_divergence else None)
         return distance_fns_list, axis_list
-    def compute_metric(self,
-                       mp_model_configuration: List[int],
-                       node_idx: List[int] = None,
-                       baseline_mp_configuration: List[int] = None) -> float:
+    def compute_metric(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]) -> float:
         """
         Compute the sensitivity metric of the MP model for a given configuration (the sensitivity
         is computed based on the similarity of the interest points' outputs between the MP model
         and the float model or a custom metric if given).
+        Quantization for any configurable activation / weight that were not passed is disabled.
         Args:
-            mp_model_configuration: Bitwidth configuration to use to configure the MP model.
-            node_idx: A list of nodes' indices to configure (instead of using the entire mp_model_configuration).
-            baseline_mp_configuration: A mixed-precision configuration to set the model back to after modifying it to
-                compute the metric for the given configuration.
+            mp_a_cfg: Bitwidth activations configuration for the MP model.
+            mp_w_cfg: Bitwidth weights configuration for the MP model.
         Returns:
             The sensitivity metric of the MP model for a given configuration.
         """
-        # Configure MP model with the given configuration.
-        self._configure_bitwidths_model(mp_model_configuration,
-                                        node_idx)
+        with self._configured_mp_model(mp_a_cfg, mp_w_cfg):
+            sensitivity_metric = self._compute_metric()
-        # Compute the distance metric
-        if self.quant_config.custom_metric_fn is None:
-            ipts_distances, out_pts_distances = self._compute_distance()
-            sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
-                                              self.quant_config.distance_weighting_method)
-        else:
-            sensitivity_metric = self.quant_config.custom_metric_fn(self.model_mp)
-            if not isinstance(sensitivity_metric, (float, np.floating)):
-                raise TypeError(f'The custom_metric_fn is expected to return float or numpy float, got {type(sensitivity_metric).__name__}')
+        return sensitivity_metric
-        # Configure MP model back to the same configuration as the baseline model if baseline provided
-        if baseline_mp_configuration is not None:
-            self._configure_bitwidths_model(baseline_mp_configuration,
-                                            node_idx)
+    def _compute_metric(self) -> float:
+        """
+        Compute sensitivity metric on a configured mp model.
+        Returns:
+            Sensitivity metric.
+        """
+        if self.quant_config.custom_metric_fn:
+            sensitivity_metric = self.quant_config.custom_metric_fn(self.model_mp)
+            if not isinstance(sensitivity_metric, (float, np.floating)):
+                raise TypeError(
+                    f'The custom_metric_fn is expected to return float or numpy float, got {type(sensitivity_metric).__name__}')
+            return sensitivity_metric
+        # compute default metric
+        ipts_distances, out_pts_distances = self._compute_distance()
+        sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
+                                                               self.quant_config.distance_weighting_method)
         return sensitivity_metric
     def _init_baseline_tensors_list(self):
@@ -217,17 +215,31 @@ class SensitivityEvaluation:
         evaluation_graph = copy.deepcopy(self.graph)
-        if self.disable_activation_for_metric:
-            for n in evaluation_graph.get_topo_sorted_nodes():
+        # Disable quantization for non-configurable nodes, and, if requested, for all activations (quantizers won't
+        # be added to the model).
+        for n in evaluation_graph.get_topo_sorted_nodes():
+            if self.disable_activation_for_metric or not n.has_configurable_activation():
                 for c in n.candidates_quantization_cfg:
                     c.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
+            if not n.has_any_configurable_weight():
+                for c in n.candidates_quantization_cfg:
+                    c.weights_quantization_cfg.disable_all_weights_quantization()
         model_mp, _, conf_node2layers = self.fw_impl.model_builder(evaluation_graph,
                                                                    mode=ModelBuilderMode.MIXEDPRECISION,
                                                                    append2output=self.interest_points + self.output_points,
                                                                    fw_info=self.fw_info)
-        # Build a baseline model.
+        # Disable all configurable quantizers. They will be activated one at a time during sensitivity evaluation.
+        # Note: from this point mp_model is not in sync with graph quantization configuration for configurable nodes.
+        for layer in itertools.chain(*conf_node2layers.values()):
+            if isinstance(layer, self.fw_impl.activation_quant_layer_cls):
+                set_activation_quant_layer_to_bitwidth(layer, None, self.fw_impl)
+            else:
+                assert isinstance(layer, self.fw_impl.weights_quant_layer_cls)
+                set_weights_quant_layer_to_bitwidth(layer, None, self.fw_impl)
+        # Build a baseline model (to compute distances from).
         baseline_model, _ = self.fw_impl.model_builder(evaluation_graph,
                                                        mode=ModelBuilderMode.FLOAT,
                                                        append2output=self.interest_points + self.output_points)
@@ -259,55 +271,46 @@ class SensitivityEvaluation:
         # Return the mean approximation value across all images for each interest point
         return np.mean(approx_by_image, axis=0)
-    def _configure_bitwidths_model(self,
-                                   mp_model_configuration: List[int],
-                                   node_idx: List[int]):
-        """
-        Configure a dynamic model (namely, model with layers that their weights and activation
-        bit-width can be configured) using an MP model configuration mp_model_configuration.
-        Args:
-            mp_model_configuration: Configuration of bit-width indices to set to the model.
-            node_idx: List of nodes' indices to configure (the rest layers are configured as the baseline model).
+    @contextlib.contextmanager
+    def _configured_mp_model(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]):
         """
+        Context manager to configure specific configurable layers of the mp model. At exit, configuration is
+        automatically restored to un-quantized.
-        # Configure model
-        # Note: Not all nodes in the graph are included in the MP model that is returned by the model builder.
-        # Thus, the last configurable layer must be included in the interest points for evaluating the metric,
-        # otherwise, not all configurable nodes will be considered throughout the MP optimization search (since
-        # they will not affect the metric value).
-        if node_idx is not None:  # configure specific layers in the mp model
-            for node_idx_to_configure in node_idx:
-                self._configure_node_bitwidth(self.sorted_configurable_nodes_names,
-                                              mp_model_configuration, node_idx_to_configure)
-        else:  # use the entire mp_model_configuration to configure the model
-            for node_idx_to_configure, bitwidth_idx in enumerate(mp_model_configuration):
-                self._configure_node_bitwidth(self.sorted_configurable_nodes_names,
-                                              mp_model_configuration, node_idx_to_configure)
-    def _configure_node_bitwidth(self,
-                                 sorted_configurable_nodes_names: List[str],
-                                 mp_model_configuration: List[int],
-                                 node_idx_to_configure: int):
-        """
-        Configures a node with multiple quantization candidates to the bitwidth candidate in the given index.
         Args:
-            sorted_configurable_nodes_names: A list of configurable nodes names sorted according to the graph
-                topological sort order.
-            mp_model_configuration: Configuration of bit-width indices to set to the model.
-            node_idx_to_configure: Quantization configuration candidate to configure.
-        Returns:
+            mp_a_cfg: Nodes bitwidth indices to configure activation quantizers to.
+            mp_w_cfg: Nodes bitwidth indices to configure weights quantizers to.
         """
-        node_name = sorted_configurable_nodes_names[node_idx_to_configure]
-        layers_to_config = self.conf_node2layers.get(node_name, None)
-        if layers_to_config is None:
-            Logger.critical(
-                f"Matching layers for node {node_name} not found in the mixed precision model configuration.")  # pragma: no cover
-        for current_layer in layers_to_config:
-            self.set_layer_to_bitwidth(current_layer, mp_model_configuration[node_idx_to_configure])
+        if not (mp_a_cfg and any(v is not None for v in mp_a_cfg.values()) or
+                mp_w_cfg and any(v is not None for v in mp_w_cfg.values())):
+            raise ValueError(f'Requested configuration is either empty or contain only None values.')
+        # defined here so that it can't be used directly
+        def apply_bitwidth_config(a_cfg, w_cfg):
+            node_names = set(a_cfg.keys()).union(set(w_cfg.keys()))
+            for n in node_names:
+                node_quant_layers = self.conf_node2layers.get(n)
+                if node_quant_layers is None:    # pragma: no cover
+                    raise ValueError(f"Matching layers for node {n} not found in the mixed precision model configuration.")
+                for qlayer in node_quant_layers:
+                    assert isinstance(qlayer, (self.fw_impl.activation_quant_layer_cls,
+                                               self.fw_impl.weights_quant_layer_cls)), f'Unexpected {type(qlayer)} of node {n}'
+                    if isinstance(qlayer, self.fw_impl.activation_quant_layer_cls) and n in a_cfg:
+                        set_activation_quant_layer_to_bitwidth(qlayer, a_cfg[n], self.fw_impl)
+                        a_cfg.pop(n)
+                    elif isinstance(qlayer, self.fw_impl.weights_quant_layer_cls) and n in w_cfg:
+                        set_weights_quant_layer_to_bitwidth(qlayer, w_cfg[n], self.fw_impl)
+                        w_cfg.pop(n)
+            if a_cfg or w_cfg:
+                raise ValueError(f'Not all mp configs were consumed, remaining activation config {a_cfg}, '
+                                 f'weights config {w_cfg}.')
+        apply_bitwidth_config(mp_a_cfg.copy(), mp_w_cfg.copy())
+        try:
+            yield
+        finally:
+            apply_bitwidth_config({n: None for n in mp_a_cfg}, {n: None for n in mp_w_cfg})
     def _compute_points_distance(self,
                                  baseline_tensors: List[Any],

model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py CHANGED Viewed

@@ -12,39 +12,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Any
+import typing
+from typing import Any, Optional
+if typing.TYPE_CHECKING:    # pragma: no cover
+    from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-def set_layer_to_bitwidth(quantization_layer: Any,
-                          bitwidth_idx: int,
-                          weights_quantizer_type: type,
-                          activation_quantizer_type: type,
-                          weights_quant_layer_type: type,
-                          activation_quant_layer_type: type):
+def set_activation_quant_layer_to_bitwidth(quantization_layer: Any,
+                                           bitwidth_idx: Optional[int],
+                                           fw_impl: 'FrameworkImplementation'):
     """
-    Configures a layer's configurable quantizer to work with a different bit-width.
+    Configures a layer's configurable activation quantizer to work with a different bit-width.
     The bit-width_idx is the index of the actual quantizer the quantizer object in the quantization_layer wraps/holds.
     Args:
         quantization_layer: Layer to change its bit-width.
-        bitwidth_idx: Index of the bit-width the layer should work with.
-        weights_quantizer_type: A class of weights quantizer with configurable bitwidth options.
-        activation_quantizer_type: A class of activation quantizer with configurable bitwidth options.
-        weights_quant_layer_type: A class of a weights layer wrapper.
-        activation_quant_layer_type: A class of an activation quantization holder.
+        bitwidth_idx: Index of the bit-width the layer should work with, or None to disable quantization.
+        fw_impl: framework implementation object.
     """
+    assert isinstance(quantization_layer, fw_impl.activation_quant_layer_cls)
+    assert isinstance(quantization_layer.activation_holder_quantizer, fw_impl.configurable_activation_quantizer_cls)
+    quantization_layer.activation_holder_quantizer.set_active_activation_quantizer(bitwidth_idx)
-    if isinstance(quantization_layer, weights_quant_layer_type):
-        for _, quantizer in quantization_layer.weights_quantizers.items():
-            if isinstance(quantizer, weights_quantizer_type):
-                # Setting bitwidth only for configurable layers. There might be wrapped layers that aren't configurable,
-                # for instance, if only activations are quantized with mixed precision and weights are quantized with
-                # fixed precision
-                quantizer.set_weights_bit_width_index(bitwidth_idx)
+def set_weights_quant_layer_to_bitwidth(quantization_layer: Any,
+                                        bitwidth_idx: Optional[int],
+                                        fw_impl: 'FrameworkImplementation'):
+    """
+    Configures a layer's configurable weights quantizer to work with a different bit-width.
+    The bit-width_idx is the index of the actual quantizer the quantizer object in the quantization_layer wraps/holds.
-    if isinstance(quantization_layer, activation_quant_layer_type):
-        if isinstance(quantization_layer.activation_holder_quantizer, activation_quantizer_type):
-            # Setting bitwidth only for configurable layers. There might be activation layers that isn't configurable,
-            # for instance, if only weights are quantized with mixed precision and activation are quantized with
-            # fixed precision
-            quantization_layer.activation_holder_quantizer.set_active_activation_quantizer(bitwidth_idx)
+    Args:
+        quantization_layer: Layer to change its bit-width.
+        bitwidth_idx: Index of the bit-width the layer should work with, or None to disable quantization.
+        fw_impl: framework implementation object.
+    """
+    assert isinstance(quantization_layer, fw_impl.weights_quant_layer_cls)
+    configurable_quantizers = [q for q in quantization_layer.weights_quantizers.values()
+                               if isinstance(q, fw_impl.configurable_weights_quantizer_cls)]
+    assert configurable_quantizers
+    for quantizer in configurable_quantizers:
+        quantizer.set_weights_bit_width_index(bitwidth_idx)

model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py CHANGED Viewed

@@ -104,10 +104,11 @@ def greedy_solution_refinement_procedure(mp_solution: Dict[BaseNode, int],
             new_solution[node_idx_to_upgrade] = nodes_next_candidate[node_idx_to_upgrade]
             changed = True
-    if any([mp_solution[n] != new_solution[n] for n in mp_solution]):
-        Logger.info(f'Greedy MP algorithm changed configuration from (numbers represent indices of the '
-                    f'chosen bit-width candidate for each layer):\n{mp_solution}\nto\n{new_solution}')
+    changed_solutions = {n: (sol, new_solution[n]) for n, sol in mp_solution.items() if sol != new_solution[n]}
+    if changed_solutions:
+        msg = '\n'.join(f'{n.name}: {mp_solution[n]} -> {new_solution[n]}' for n in changed_solutions)
+        Logger.info(f'Greedy MP algorithm changed configuration for {len(changed_solutions)} out of {len(mp_solution)} '
+                    f'layers (numbers represent indices of the chosen bit-width candidate for each layer):\n{msg}')
     return new_solution

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -549,6 +549,13 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         """
         return {attr: self.get_attr_config(attr) for attr in self.all_weight_attrs}
+    def disable_all_weights_quantization(self):
+        """ Disable quantization for all weights. """
+        for w_cfg in self.pos_attributes_config_mapping.values():
+            w_cfg.enable_weights_quantization = False
+        for w_cfg in self.attributes_config_mapping.values():
+            w_cfg.enable_weights_quantization = False
     def _extract_config_for_attributes_with_name(self, attr_name) -> Dict[str, WeightsAttrQuantizationConfig]:
         """
         Extract the saved attributes that contain the given attribute name.

model_compression_toolkit/core/common/similarity_analyzer.py CHANGED Viewed

@@ -194,7 +194,7 @@ def compute_cs(float_tensor: np.ndarray,
     cs = np.sum(float_flat * fxp_flat, axis=axis) / ((float_norm * fxp_norm) + eps)
     # Return a non-negative float (smaller value -> more similarity)
-    return (1.0 - cs) / 2.0
+    return np.maximum((1.0 - cs) / 2.0, 0)
 def compute_lp_norm(float_tensor: np.ndarray,

model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py CHANGED Viewed

@@ -14,17 +14,16 @@
 # ==============================================================================
 from typing import Tuple, Any, Dict, Union, List
-from packaging import version
 import tensorflow as tf
+from packaging import version
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.engine.base_layer import Layer
 else:
     from keras.engine.base_layer import Layer  # pragma: no cover
 from keras.models import Model
-from mct_quantizers import KerasQuantizationWrapper, KerasActivationQuantizationHolder, QuantizationTarget
-from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
-from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer
+from mct_quantizers import KerasQuantizationWrapper, KerasActivationQuantizationHolder
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.user_info import UserInformation
@@ -34,9 +33,6 @@ from model_compression_toolkit.core.keras.mixed_precision.configurable_activatio
 from model_compression_toolkit.core.keras.mixed_precision.configurable_weights_quantizer import \
     ConfigurableWeightsQuantizer
-from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
-    get_inferable_quantizer_kwargs
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
@@ -75,6 +71,7 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
                                 n: common.BaseNode,
                                 layer: Layer) -> Union[KerasQuantizationWrapper, Layer]:
         """
         A function which takes a computational graph node and a keras layer and perform the quantization
         wrapping for mixed precision.
@@ -82,40 +79,21 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
             n: A node of mct graph.
             layer: A keras layer
-        Returns: Wrapped layer with a configurable quantizer if the layer should quantized in mixed precision,
-        otherwise returns either the layer wrapped with a fixed precision inferable quantizer or the layer as is if it's
-        not supposed to be quantized.
+        Returns:
+            Wrapped layer with a configurable quantizer if the layer should be quantized in mixed precision, or the
+            layer as is.
+        Raises:
+            ValueError: if kernel attribute is quantized but not configurable.
         """
         kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
-        if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr):
-            weights_conf_nodes_names = [node.name for node in self.graph.get_weights_configurable_nodes(self.fw_info)]
-            if n.name in weights_conf_nodes_names:
-                wq = ConfigurableWeightsQuantizer(**self._get_weights_configurable_quantizer_kwargs(n, kernel_attr))
-                return KerasQuantizationWrapper(layer, weights_quantizers={kernel_attr: wq})
-            else:
-                # TODO: Do we want to include other quantized attributes that are not
-                #  the kernel attribute in the mixed precision model?
-                #  Currently, we only consider kernel attribute quantization (whether it is in mixed precision
-                #  or single precision).
-                node_weights_qc = n.get_unique_weights_candidates(kernel_attr)
-                if not len(node_weights_qc) == 1:
-                    Logger.critical(f"Expected a unique weights configuration for node {n.name}, but found {len(node_weights_qc)} configurations.")# pragma: no cover
-                weights_quant_cfg = node_weights_qc[0].weights_quantization_cfg
-                weights_quant_method = weights_quant_cfg.get_attr_config(kernel_attr).weights_quantization_method
-                quantier_for_node = get_inferable_quantizer_class(QuantizationTarget.Weights,
-                                                                  weights_quant_method,
-                                                                  BaseKerasInferableQuantizer)
-                kwargs = get_inferable_quantizer_kwargs(weights_quant_cfg,
-                                                        QuantizationTarget.Weights,
-                                                        kernel_attr)
-                return KerasQuantizationWrapper(layer,
-                                                weights_quantizers={kernel_attr: quantier_for_node(**kwargs)})
-        return layer
+        if kernel_attr is None or not n.is_weights_quantization_enabled(kernel_attr):
+            return layer
+        if not n.is_configurable_weight(kernel_attr):  # pragma: no cover
+            raise ValueError(f'Weight wrapper is not expected to be created for non-configurable weight of node {n}.')
+        wq = ConfigurableWeightsQuantizer(**self._get_weights_configurable_quantizer_kwargs(n, kernel_attr))
+        return KerasQuantizationWrapper(layer, weights_quantizers={kernel_attr: wq})
     def _get_weights_configurable_quantizer_kwargs(self, n: BaseNode, attr: str) -> Dict[str, Any]:
         """
@@ -147,50 +125,36 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
     def mixed_precision_activation_holder(self, n: BaseNode) -> KerasActivationQuantizationHolder:
         """
-        Retrieve a KerasActivationQuantizationHolder layer to use for activation quantization for a node.
-        The layer should hold either a configurable activation quantizer, if it is quantized with mixed precision,
-        or an inferable quantizer for fixed single bit-width quantization.
+        Builds KerasActivationQuantizationHolder layer with a configurable quantizer for mixed precision for a node
+        with a configurable activation.
         Args:
             n: Node to get KerasActivationQuantizationHolder to attach in its output.
         Returns:
             A KerasActivationQuantizationHolder layer for the node activation quantization.
+        Raises:
+            ValueError: if node's activation is not configurable.
         """
+        if not n.has_configurable_activation():  # pragma: no cover
+            raise ValueError(f'Activation holder is not expected to be created for a non-configurable activation of '
+                             f'node {n}')
+        num_of_outputs = len(n.output_shape) if isinstance(n.output_shape, list) else 1
+        node_q_cfg_candidates = n.candidates_quantization_cfg
+        # sorting the candidates by kernel attribute weights number of bits first and then by
+        # activation number of bits (in reversed order).
+        # since only kernel attribute is quantized in weights mixed precision,
+        # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
+        n.sort_node_candidates(self.fw_info)
-        activation_conf_nodes_names = [n.name for n in self.graph.get_activation_configurable_nodes()]
-        activation_quantizers = []
-        if n.is_activation_quantization_enabled():
-            num_of_outputs = len(n.output_shape) if isinstance(n.output_shape, list) else 1
-            if n.name in activation_conf_nodes_names:
-                assert n.candidates_quantization_cfg is not None, f"Node {n.name} candidates_quantization_cfg is None"
-                node_q_cfg_candidates = n.candidates_quantization_cfg
-                # sorting the candidates by kernel attribute weights number of bits first and then by
-                # activation number of bits (in reversed order).
-                # since only kernel attribute is quantized in weights mixed precision,
-                # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
-                n.sort_node_candidates(self.fw_info)
-                max_candidate_idx = n.find_max_candidate_index()
-                kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
-                activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
-                                                                            'max_candidate_idx': max_candidate_idx,
-                                                                            'kernel_attr': kernel_attr})] \
-                                        * num_of_outputs
-            else:
-                node_act_qc = n.get_unique_activation_candidates()
-                assert len(node_act_qc) == 1, f"Expecting node {n.name} to have a unique activation configuration, " \
-                                              f"but {len(node_act_qc)} different configurations exist."
-                quantizer_for_node = get_inferable_quantizer_class(QuantizationTarget.Activation,
-                                                                   node_act_qc[0].activation_quantization_cfg.activation_quantization_method,
-                                                                   BaseKerasInferableQuantizer)
-                kwargs = get_inferable_quantizer_kwargs(node_act_qc[0].activation_quantization_cfg,
-                                                        QuantizationTarget.Activation)
-                activation_quantizers = [quantizer_for_node(**kwargs)] * num_of_outputs
+        max_candidate_idx = n.find_max_candidate_index()
+        kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
+        activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
+                                                                    'max_candidate_idx': max_candidate_idx,
+                                                                    'kernel_attr': kernel_attr})] \
+                                 * num_of_outputs
         # Holder by definition uses a single quantizer for the activation quantization
         # thus we make sure this is the only possible case (unless it's a node with no activation

model_compression_toolkit/core/keras/keras_implementation.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from functools import partial
-from typing import List, Any, Tuple, Callable, Dict, Union, Generator
+from typing import List, Any, Tuple, Callable, Union, Generator
 import numpy as np
 import tensorflow as tf
@@ -22,7 +22,7 @@ from tensorflow.keras.models import Model
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
+from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode
 from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.remove_identity import RemoveIdentity
 from model_compression_toolkit.core.keras.hessian.activation_hessian_scores_calculator_keras import \
@@ -35,8 +35,6 @@ from model_compression_toolkit.exporter.model_wrapper.fw_agnostic.get_inferable_
 from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
     get_weights_quantizer_for_node, get_activations_quantizer_for_node
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
-from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import set_layer_to_bitwidth
 from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence, compute_cs, compute_mse
 from model_compression_toolkit.core.keras.constants import ACTIVATION, SOFTMAX, SIGMOID, ARGMAX, LAYER_NAME, \
     COMBINED_NMS
@@ -61,7 +59,7 @@ else:
     from keras.layers import Dense, Activation, Conv2D, DepthwiseConv2D, Conv2DTranspose, Concatenate, Add   # pragma: no cover
     from keras.layers.core import TFOpLambda   # pragma: no cover
-from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig, MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
@@ -95,7 +93,7 @@ from model_compression_toolkit.core.keras.graph_substitutions.substitutions.mult
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.scale_equalization import \
     ScaleEqualization, ScaleEqualizationWithPad, ScaleEqualizationMidActivation, ScaleEqualizationMidActivationWithPad
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.separableconv_decomposition import \
-    SeparableConvDecomposition, DEPTH_MULTIPLIER
+    SeparableConvDecomposition
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.shift_negative_activation import \
     keras_apply_shift_negative_correction
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.dwconv_to_conv import DwconvToConv
@@ -110,9 +108,10 @@ class KerasImplementation(FrameworkImplementation):
     """
     A class with implemented methods to support optimizing Keras models.
     """
-    def __init__(self):
-        super().__init__()
+    weights_quant_layer_cls = KerasQuantizationWrapper
+    activation_quant_layer_cls = KerasActivationQuantizationHolder
+    configurable_weights_quantizer_cls = ConfigurableWeightsQuantizer
+    configurable_activation_quantizer_cls = ConfigurableActivationQuantizer
     @property
     def constants(self):
@@ -401,42 +400,6 @@ class KerasImplementation(FrameworkImplementation):
             substitutions_list.append(keras_batchnorm_refusing())
         return substitutions_list
-    def get_sensitivity_evaluator(self,
-                                  graph: Graph,
-                                  quant_config: MixedPrecisionQuantizationConfig,
-                                  representative_data_gen: Callable,
-                                  fw_info: FrameworkInfo,
-                                  disable_activation_for_metric: bool = False,
-                                  hessian_info_service: HessianInfoService = None) -> SensitivityEvaluation:
-        """
-        Creates and returns an object which handles the computation of a sensitivity metric for a mixed-precision
-        configuration (comparing to the float model).
-        Args:
-            graph: Graph to build its float and mixed-precision models.
-            quant_config: QuantizationConfig of how the model should be quantized.
-            representative_data_gen: Dataset to use for retrieving images for the models inputs.
-            fw_info: FrameworkInfo object with information about the specific framework's model.
-            disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
-            hessian_info_service: HessianScoresService to fetch scores based on a Hessian-approximation for the float model.
-        Returns:
-            A SensitivityEvaluation object.
-        """
-        return SensitivityEvaluation(graph=graph,
-                                     quant_config=quant_config,
-                                     representative_data_gen=representative_data_gen,
-                                     fw_info=fw_info,
-                                     fw_impl=self,
-                                     set_layer_to_bitwidth=partial(set_layer_to_bitwidth,
-                                                                   weights_quantizer_type=ConfigurableWeightsQuantizer,
-                                                                   activation_quantizer_type=ConfigurableActivationQuantizer,
-                                                                   weights_quant_layer_type=KerasQuantizationWrapper,
-                                                                   activation_quant_layer_type=KerasActivationQuantizationHolder),
-                                     disable_activation_for_metric=disable_activation_for_metric,
-                                     hessian_info_service=hessian_info_service)
     def get_node_prior_info(self,
                             node: BaseNode,
                             fw_info: FrameworkInfo,

mct-nightly 2.3.0.20250511.614__py3-none-any.whl → 2.3.0.20250513.611__py3-none-any.whl

mct-nightly 2.3.0.20250511.614py3-none-any.whl → 2.3.0.20250513.611py3-none-any.whl