PyPI - mct-nightly - Versions diffs - 2.3.0.20250512.625__py3-none-any.whl → 2.3.0.20250513.611__py3-none-any.whl - Mend

mct-nightly 2.3.0.20250512.625py3-none-any.whl → 2.3.0.20250513.611py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Optional
 import numpy as np
@@ -70,17 +70,17 @@ class ConfigurableActivationQuantizer(BaseKerasInferableQuantizer):
         self.activation_quantizers = init_activation_quantizers(self.node_q_cfg)
         self.active_quantization_config_index = max_candidate_idx  # initialize with first config as default
-    def set_active_activation_quantizer(self, index: int):
+    def set_active_activation_quantizer(self, index: Optional[int]):
         """
         Set an index to use for the activation quantizer to return when requested.
         Args:
             index: Index of a candidate quantization configuration to use its quantized
-                version of the float weight.
+                version of the float weight, or None to disable quantization.
         """
-        assert index < len(self.node_q_cfg), f'Quantizer has {len(self.node_q_cfg)} ' \
-                                             f'possible nbits. Can not set index {index}'
+        assert index is None or index < len(self.node_q_cfg), f'Quantizer has {len(self.node_q_cfg)} ' \
+                                                              f'possible nbits. Can not set index {index}'
         self.active_quantization_config_index = index
     def __call__(self,
@@ -96,6 +96,8 @@ class ConfigurableActivationQuantizer(BaseKerasInferableQuantizer):
         Returns:
             Quantized activation tensor.
         """
+        if self.active_quantization_config_index is None:
+            return inputs.numpy()
         return self.activation_quantizers[self.active_quantization_config_index](inputs)
     def get_config(self) -> Dict[str, Any]:  # pragma: no cover

model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from functools import partial
-from typing import Dict, Any, List
+from typing import Dict, Any, List, Optional
 from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
     verify_candidates_descending_order, init_quantized_weights
@@ -88,17 +88,17 @@ class ConfigurableWeightsQuantizer(BaseKerasInferableQuantizer):
         self.active_quantization_config_index = self.max_candidate_idx
     def set_weights_bit_width_index(self,
-                                    index: int):
+                                    index: Optional[int]):
         """
         Change the "active" bitwidth index the configurable quantizer uses, so a different quantized weight
         will be used.
         Args:
-            index: Quantization configuration candidate index to use.
+            index: Quantization configuration candidate index to use, or None to disable quantization.
         """
-        if index >= len(self.node_q_cfg):
+        if index and index >= len(self.node_q_cfg):
             Logger.critical(f'Quantizer supports only {len(self.node_q_cfg)} bit width configurations; index {index} is out of range.')# pragma: no cover
         self.active_quantization_config_index = index
@@ -118,7 +118,8 @@ class ConfigurableWeightsQuantizer(BaseKerasInferableQuantizer):
             specific quantization configuration candidate (the candidate's index is the
             index that is in active_quantization_config_index the quantizer holds).
         """
+        if self.active_quantization_config_index is None:
+            return self.float_weights
         return self.quantized_weights[self.active_quantization_config_index]
     def get_config(self) -> Dict[str, Any]:  # pragma: no cover

model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py CHANGED Viewed

@@ -16,26 +16,18 @@
 from typing import List, Any, Tuple, Union, Dict
 import torch
-from mct_quantizers import PytorchQuantizationWrapper, QuantizationTarget, \
-    PytorchActivationQuantizationHolder
+from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
 from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER
-from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
-from mct_quantizers.pytorch.quantizers import BasePyTorchInferableQuantizer
-from model_compression_toolkit.core import FrameworkInfo
-from model_compression_toolkit.core import common
+from model_compression_toolkit.core import FrameworkInfo, common
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.user_info import UserInformation
 from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.mixed_precision.configurable_activation_quantizer import \
     ConfigurableActivationQuantizer
 from model_compression_toolkit.core.pytorch.mixed_precision.configurable_weights_quantizer import \
     ConfigurableWeightsQuantizer
-from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.node_to_quantizer import \
-    get_weights_inferable_quantizer_kwargs, get_activation_inferable_quantizer_kwargs
 from model_compression_toolkit.logger import Logger
@@ -77,44 +69,25 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
             n: A node of mct graph.
             layer: A pytorch layer
-        Returns: Wrapped layer with a configurable quantizer if the layer should quantized in mixed precision,
-        otherwise returns either the layer wrapped with a fixed precision inferable quantizer or the layer as is if it's
-        not supposed to be quantized.
+        Returns:
+            Wrapped layer with a configurable quantizer if the layer should be quantized in mixed precision, or the
+            layer as is.
+        Raises:
+            ValueError: if kernel attribute is quantized but not configurable.
         """
-        weights_conf_nodes_names = [n.name for n in self.graph.get_weights_configurable_nodes(self.fw_info)]
         kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
-        if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr):
-            if n.name in weights_conf_nodes_names:
-                return PytorchQuantizationWrapper(layer,
-                                                  weights_quantizers={
-                                                      kernel_attr: ConfigurableWeightsQuantizer(
-                                                          **self._get_weights_configurable_quantizer_kwargs(n,
-                                                                                                            kernel_attr),
-                                                          kernel_attr=kernel_attr)})
-            else:
-                # TODO: Do we want to include other quantized attributes that are not
-                #  the kernel attribute in the mixed precision model?
-                #  Currently, we only consider kernel attribute quantization (whether it is in mixed precision
-                #  or single precision).
-                node_weights_qc = n.get_unique_weights_candidates(kernel_attr)
-                if not len(node_weights_qc) == 1:
-                    Logger.critical(f"Expected a single weights quantization configuration for node '{n.name}', but found ({len(node_weights_qc)}) configurations.")# pragma: no cover
-                quantier_for_node = get_inferable_quantizer_class(QuantizationTarget.Weights,
-                                                                  node_weights_qc[0].weights_quantization_cfg
-                                                                  .get_attr_config(kernel_attr)
-                                                                  .weights_quantization_method,
-                                                                  BasePyTorchInferableQuantizer)
-                kwargs = get_weights_inferable_quantizer_kwargs(node_weights_qc[0].weights_quantization_cfg,
-                                                                kernel_attr)
-                return PytorchQuantizationWrapper(layer,
-                                                  weights_quantizers={kernel_attr: quantier_for_node(**kwargs)})
-        return layer
+        if kernel_attr is None or not n.is_weights_quantization_enabled(kernel_attr):
+            return layer
+        if not n.is_configurable_weight(kernel_attr):  # pragma: no cover
+            raise ValueError(f'Weight wrapper is not expected to be created for non-configurable weight of node {n}.')
+        return PytorchQuantizationWrapper(layer,
+                                          weights_quantizers={
+                                              kernel_attr: ConfigurableWeightsQuantizer(
+                                                  **self._get_weights_configurable_quantizer_kwargs(n,
+                                                                                                    kernel_attr),
+                                                  kernel_attr=kernel_attr)})
     def _get_weights_configurable_quantizer_kwargs(self, n: BaseNode, attr: str) -> Dict[str, Any]:
         """
@@ -145,9 +118,8 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
     def mixed_precision_activation_holder(self, n: BaseNode, holder_type: PytorchActivationQuantizationHolder = PytorchActivationQuantizationHolder) -> PytorchActivationQuantizationHolder:
         """
-        Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization for a node.
-        The layer should hold either a configurable activation quantizer, if it is quantized with mixed precision,
-        or an inferable quantizer for fixed single bit-width quantization.
+        Builds PytorchActivationQuantizationHolder layer with a configurable quantizer for mixed precision for a node
+        with a configurable activation.
         Args:
             n: Node to get PytorchActivationQuantizationHolder to attach in its output.
@@ -155,39 +127,35 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
         Returns:
             A PytorchActivationQuantizationHolder layer for the node activation quantization.
+        Raises:
+            ValueError: if node's activation is not configurable.
         """
+        if holder_type != PytorchActivationQuantizationHolder:  # pragma: no cover
+            raise TypeError(f'Expected PytorchActivationQuantizationHolder holder type for mixed precision, got'
+                            f'{holder_type}')
+        if not n.has_configurable_activation():  # pragma: no cover
+            raise ValueError(f'Activation holder is not expected to be created for a non-configurable activation of '
+                             f'node {n}')
+        num_of_outputs = len(n.output_shape) if isinstance(n.output_shape, list) else 1
-        activation_conf_nodes_names = [n.name for n in self.graph.get_activation_configurable_nodes()]
-        activation_quantizers = []
-        if n.is_activation_quantization_enabled():
-            num_of_outputs = len(n.output_shape) if isinstance(n.output_shape, list) else 1
-            if n.name in activation_conf_nodes_names:
-                assert n.candidates_quantization_cfg is not None, f"Node {n.name} candidates_quantization_cfg is None"
-                node_q_cfg_candidates = n.candidates_quantization_cfg
-                # sorting the candidates by kernel attribute weights number of bits first and then by
-                # activation number of bits (in reversed order).
-                # since only kernel attribute is quantized in weights mixed precision,
-                # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
-                n.sort_node_candidates(self.fw_info)
-                max_candidate_idx = n.find_max_candidate_index()
-                kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
-                activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
-                                                                            'max_candidate_idx': max_candidate_idx,
-                                                                            'kernel_attr': kernel_attr})] \
-                                        * num_of_outputs
-            else:
-                node_act_qc = n.get_unique_activation_candidates()
-                assert len(node_act_qc) == 1, f"Expected a single activation configuration for node '{n.name}', but found multiple ({len(node_act_qc)}) configurations."
-                quantizer_for_node = get_inferable_quantizer_class(QuantizationTarget.Activation,
-                                                                   node_act_qc[0].activation_quantization_cfg.activation_quantization_method,
-                                                                   BasePyTorchInferableQuantizer)
-                kwargs = get_activation_inferable_quantizer_kwargs(node_act_qc[0].activation_quantization_cfg)
-                activation_quantizers = [quantizer_for_node(**kwargs)] * num_of_outputs
+        node_q_cfg_candidates = n.candidates_quantization_cfg
+        # sorting the candidates by kernel attribute weights number of bits first and then by
+        # activation number of bits (in reversed order).
+        # since only kernel attribute is quantized in weights mixed precision,
+        # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
+        n.sort_node_candidates(self.fw_info)
+        max_candidate_idx = n.find_max_candidate_index()
+        kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
+        activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
+                                                                    'max_candidate_idx': max_candidate_idx,
+                                                                    'kernel_attr': kernel_attr})] \
+                                * num_of_outputs
         # Holder by definition uses a single quantizer for the activation quantization
         # thus we make sure this is the only possible case (unless it's a node with no activation

model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Dict, List, Any
+from typing import List, Optional
 from model_compression_toolkit.core.common.mixed_precision.configurable_quant_id import ConfigurableQuantizerIdentifier
 from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
@@ -70,18 +70,15 @@ class ConfigurableActivationQuantizer(BasePyTorchInferableQuantizer):
         self.activation_quantizers = init_activation_quantizers(self.node_q_cfg)
         self.active_quantization_config_index = max_candidate_idx  # initialize with first config as default
-    def set_active_activation_quantizer(self,
-                                        index: int):
+    def set_active_activation_quantizer(self, index: Optional[int]):
         """
         Set an activation quantizer to use by the layer wrapped by the module.
         Args:
-            index: Index of a candidate quantization configuration to use its quantizer
-                for quantizing the activation.
+            index: Index of a candidate quantization configuration to use, or None to disable quantization.
         """
-        assert index < len(self.node_q_cfg), f'Quantizer has {len(self.node_q_cfg)} ' \
-                                             f'possible nbits. Can not set index {index}'
+        assert index is None or index < len(self.node_q_cfg), (f'Quantizer has {len(self.node_q_cfg)} possible nbits. '
+                                                               f'Can not set index {index}')
         self.active_quantization_config_index = index
     def __call__(self,
@@ -97,5 +94,6 @@ class ConfigurableActivationQuantizer(BasePyTorchInferableQuantizer):
         Returns:
             Quantized activation tensor.
         """
+        if self.active_quantization_config_index is None:
+            return inputs
         return self.activation_quantizers[self.active_quantization_config_index](inputs)

model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Dict, Any, List
+from typing import List, Optional
 from model_compression_toolkit.core.common.mixed_precision.configurable_quant_id import ConfigurableQuantizerIdentifier
 from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
@@ -87,20 +87,18 @@ class ConfigurableWeightsQuantizer(BasePyTorchInferableQuantizer):
         self.active_quantization_config_index = self.max_candidate_idx
-    def set_weights_bit_width_index(self,
-                                    index: int):
+    def set_weights_bit_width_index(self, index:Optional[int]):
         """
         Change the "active" bitwidth index the configurable quantizer uses, so a different quantized weight
         will be used.
         Args:
-            index: Quantization configuration candidate index to use.
+            index: Quantization configuration candidate index to use, or None to disable quantization.
         """
-        assert index < len(self.node_q_cfg), \
-            f'Quantizer has {len(self.node_q_cfg)} ' \
-            f'possible nbits. Can not set index {index}'
+        assert index is None or index < len(self.node_q_cfg), \
+            f'Quantizer has {len(self.node_q_cfg)} possible nbits. Can not set index {index}'
         self.active_quantization_config_index = index
     def __call__(self,
@@ -112,12 +110,16 @@ class ConfigurableWeightsQuantizer(BasePyTorchInferableQuantizer):
             to the current active_quantization_config_index.
         Args:
-            inputs: Input tensor (not used in this function since the weights are already quantized).
+            inputs: Input tensor (only used if quantization is disabled).
         Returns:
             Quantized weight, that was quantized using number of bits that is in a
                 specific quantization configuration candidate (the candidate's index is the
-                index that is in active_quantization_config_index the quantizer holds).
+                index that is in active_quantization_config_index the quantizer holds),
+                or detached input if quantization is disabled.
         """
+        if self.active_quantization_config_index is None:
+            # Note: must be detached, otherwise quantization wrapper will inject it back as a Parameter to the
+            # underlying layer, which then causes crash during inference next time the quantizer is enabled
+            return inputs.detach()
         return self.quantized_weights[self.active_quantization_config_index]

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -26,14 +26,12 @@ from torch.nn import Module, Sigmoid, Softmax
 import model_compression_toolkit.core.pytorch.constants as pytorch_constants
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig, MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
-from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
-from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import set_layer_to_bitwidth
+from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode
 from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_kl_divergence, compute_cs
@@ -112,6 +110,10 @@ class PytorchImplementation(FrameworkImplementation):
     """
     A class with implemented methods to support optimizing Pytorch models.
     """
+    weights_quant_layer_cls = PytorchQuantizationWrapper,
+    activation_quant_layer_cls = PytorchActivationQuantizationHolder
+    configurable_weights_quantizer_cls = ConfigurableWeightsQuantizer
+    configurable_activation_quantizer_cls = ConfigurableActivationQuantizer
     def __init__(self):
         super().__init__()
@@ -397,43 +399,6 @@ class PytorchImplementation(FrameworkImplementation):
             substitutions_list.append(pytorch_batchnorm_refusing())
         return substitutions_list
-    def get_sensitivity_evaluator(self,
-                                  graph: Graph,
-                                  quant_config: MixedPrecisionQuantizationConfig,
-                                  representative_data_gen: Callable,
-                                  fw_info: FrameworkInfo,
-                                  disable_activation_for_metric: bool = False,
-                                  hessian_info_service: HessianInfoService = None
-                                  ) -> SensitivityEvaluation:
-        """
-        Creates and returns an object which handles the computation of a sensitivity metric for a mixed-precision
-        configuration (comparing to the float model).
-        Args:
-            graph: Graph to build its float and mixed-precision models.
-            quant_config: QuantizationConfig of how the model should be quantized.
-            representative_data_gen: Dataset to use for retrieving images for the models inputs.
-            fw_info: FrameworkInfo object with information about the specific framework's model.
-            disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
-            hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model.
-        Returns:
-            A SensitivityEvaluation object.
-        """
-        return SensitivityEvaluation(graph=graph,
-                                     quant_config=quant_config,
-                                     representative_data_gen=representative_data_gen,
-                                     fw_info=fw_info,
-                                     fw_impl=self,
-                                     set_layer_to_bitwidth=partial(set_layer_to_bitwidth,
-                                                                   weights_quantizer_type=ConfigurableWeightsQuantizer,
-                                                                   activation_quantizer_type=ConfigurableActivationQuantizer,
-                                                                   weights_quant_layer_type=PytorchQuantizationWrapper,
-                                                                   activation_quant_layer_type=PytorchActivationQuantizationHolder),
-                                     disable_activation_for_metric=disable_activation_for_metric,
-                                     hessian_info_service=hessian_info_service)
     def get_node_prior_info(self,
                             node: BaseNode,
                             fw_info: FrameworkInfo,

{mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.3.0.20250512.625__py3-none-any.whl → 2.3.0.20250513.611__py3-none-any.whl

mct-nightly 2.3.0.20250512.625py3-none-any.whl → 2.3.0.20250513.611py3-none-any.whl