PyPI - mct-nightly - Versions diffs - 2.4.0.20250630.629__py3-none-any.whl → 2.4.0.20250702.605__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250630.629py3-none-any.whl → 2.4.0.20250702.605py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -12,24 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, Any, List, Tuple, Union, Dict, TYPE_CHECKING
+from typing import Any, List, Dict, TYPE_CHECKING
 from enum import Enum, auto
-import numpy as np
 from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
-from model_compression_toolkit.core.common.quantization.quantization_fn_selection import get_weights_quantization_fn
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.quantization.quantization_params_fn_selection import \
-    get_activation_quantization_params_fn, get_weights_quantization_params_fn
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig, \
-    QuantizationErrorMethod
-from model_compression_toolkit.target_platform_capabilities.constants import POS_ATTR
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
+from model_compression_toolkit.target_platform_capabilities.constants import POSITIONAL_ATTR
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import \
-    AttributeQuantizationConfig, \
-    OpQuantizationConfig
+    AttributeQuantizationConfig, OpQuantizationConfig
 if TYPE_CHECKING:
     from model_compression_toolkit.core.common.graph.base_node import WeightAttrT
@@ -86,29 +78,14 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
     """
     Attributes for configuring the quantization of the activations of a node.
     """
-    def __init__(self,
-                 qc: QuantizationConfig,
-                 op_cfg: OpQuantizationConfig,
-                 activation_quantization_fn: Callable,
-                 activation_quantization_params_fn: Callable
-                 ):
+    def __init__(self, op_cfg: OpQuantizationConfig):
         """
         Args:
-            qc: QuantizationConfig to create the node's config from.
             op_cfg: OpQuantizationConfig of the node with quantizers types to use when creating node quantization configuration.
-            activation_quantization_fn: Function to use when quantizing the node's activations.
-            activation_quantization_params_fn: Function to use when computing the threshold for quantizing a node's activations.
         """
-        self.activation_quantization_fn = activation_quantization_fn
-        self.activation_quantization_params_fn = activation_quantization_params_fn
-        self.activation_quantization_params = {}
         self.activation_quantization_method = op_cfg.activation_quantization_method
-        self.activation_error_method = qc.activation_error_method
         self.activation_n_bits = op_cfg.activation_n_bits
-        self.relu_bound_to_power_of_2 = qc.relu_bound_to_power_of_2
-        self.activation_bias_correction_term = None
         if op_cfg.enable_activation_quantization and op_cfg.quantization_preserving:
             raise ValueError("An OpQuantizationConfig can't have both enable_activation_quantization and quantization_preserving enabled.")
         if op_cfg.enable_activation_quantization:
@@ -118,6 +95,29 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
         else:
             self.quant_mode = ActivationQuantizationMode.NO_QUANT
         self.signedness = op_cfg.signedness
+        self.activation_quantization_params = {}
+        # TODO irena: computed by compute_activation_bias_correction. shouldnt really be here
+        self.activation_bias_correction_term = None
+        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
+        self.activation_error_method = None
+        self.relu_bound_to_power_of_2 = None
+        self.activation_channel_equalization = None
+        self.input_scaling = None
+        self.min_threshold = None
+        self.l_p_value = None
+        self.shift_negative_activation_correction = None
+        self.z_threshold = None
+        self.shift_negative_ratio = None
+        self.shift_negative_threshold_recalculation = None
+        self.concat_threshold_update = None
+    def set_qc(self, qc: QuantizationConfig):
+        """ TODO irena: temporary keep all the attributes as before not to break all code at once.
+             Eventually all of them should be removed from here. """
+        self.activation_error_method = qc.activation_error_method
+        self.relu_bound_to_power_of_2 = qc.relu_bound_to_power_of_2
         self.activation_channel_equalization = qc.activation_channel_equalization
         self.input_scaling = qc.input_scaling
         self.min_threshold = qc.min_threshold
@@ -139,65 +139,6 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
     def fln_quantization(self):
         return self.quant_mode == ActivationQuantizationMode.FLN_QUANT
-    def quantize_node_output(self,
-                             tensors: Any) -> Any:
-        """
-        Args:
-            tensors: framework tensor/s
-        Returns:
-            Framework tensor/s after applying fake quantization.
-        """
-        fake_quant = self.activation_quantization_fn(self.activation_n_bits,
-                                                     self.activation_quantization_params)
-        if fake_quant is None:
-            Logger.critical(
-                "Layer is intended to be quantized, but the fake_quant function is None.")  # pragma: no cover
-        return fake_quant(tensors)
-    @property
-    def activation_error_method(self) -> QuantizationErrorMethod:
-        """
-        activation_error_method getter.
-        """
-        return self._activation_error_method
-    @activation_error_method.setter
-    def activation_error_method(self, value: QuantizationErrorMethod):
-        """
-        activation_error_method setter.
-        Args:
-            value: New activation_error_method to set to the node activation configuration.
-        """
-        self._activation_error_method = value
-        self.activation_quantization_params_fn = get_activation_quantization_params_fn(activation_quantization_method=self.activation_quantization_method)
-    def set_activation_quantization_fn(self, activation_quantization_fn: Callable):
-        """
-        Sets activation quantization function for the node.
-        Args:
-            activation_quantization_fn: Function for quantazing the activations.
-        """
-        self.activation_quantization_fn = activation_quantization_fn
-    def set_activation_quantization_params_fn(self, activation_quantization_params_fn:Callable):
-        """
-        Sets activation params function for the node.
-        Args:
-            activation_quantization_params_fn: Function for calculating activation params.
-        """
-        self.activation_quantization_params_fn = activation_quantization_params_fn
     def set_activation_quantization_param(self,
                                           activation_params: dict):
         """
@@ -224,9 +165,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
         if not isinstance(other, NodeActivationQuantizationConfig):
             return False  # pragma: no cover
-        return self.activation_quantization_fn == other.activation_quantization_fn and \
-               self.activation_quantization_params_fn == other.activation_quantization_params_fn and \
-               self.activation_error_method == other.activation_error_method and \
+        return self.activation_error_method == other.activation_error_method and \
                self.activation_quantization_method == other.activation_quantization_method and \
                self.activation_n_bits == other.activation_n_bits and \
                self.quant_mode == other.quant_mode and \
@@ -240,9 +179,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
                self.shift_negative_threshold_recalculation == other.shift_negative_threshold_recalculation
     def __hash__(self):
-        return hash((self.activation_quantization_fn,
-                     self.activation_quantization_params_fn,
-                     self.activation_error_method,
+        return hash((self.activation_error_method,
                      self.activation_quantization_method,
                      self.activation_n_bits,
                      self.quant_mode,
@@ -261,65 +198,29 @@ class WeightsAttrQuantizationConfig:
     Configuration for quantizing a weights attribute of a node.
     """
     def __init__(self,
-                 qc: QuantizationConfig,
                  weights_attr_cfg: AttributeQuantizationConfig,
                  weights_channels_axis: ChannelAxisMapping = None):
         """
         Args:
-            qc: QuantizationConfig to create the node's config from.
             weights_attr_cfg: AttributeQuantizationConfig with parameters to use when creating the node's attribute quantization config.
             weights_channels_axis: Axis to quantize a node's attribute when quantizing per-channel (if not quantizing per-channel than expecting None).
         """
-        self.weights_quantization_fn = get_weights_quantization_fn(weights_attr_cfg.weights_quantization_method)
-        self.weights_quantization_params_fn = get_weights_quantization_params_fn(weights_attr_cfg.weights_quantization_method)
         self.weights_channels_axis = weights_channels_axis
-        self.weights_quantization_params = {}
         self.weights_quantization_method = weights_attr_cfg.weights_quantization_method
-        self.weights_error_method = qc.weights_error_method
         self.weights_n_bits = weights_attr_cfg.weights_n_bits
         self.weights_per_channel_threshold = weights_attr_cfg.weights_per_channel_threshold
         self.enable_weights_quantization = weights_attr_cfg.enable_weights_quantization
-        self.l_p_value = qc.l_p_value
-    @property
-    def weights_error_method(self) -> QuantizationErrorMethod:
-        """
-        weights_error_method getter.
-        """
-        return self._weights_error_method
-    @weights_error_method.setter
-    def weights_error_method(self, value: QuantizationErrorMethod):
-        """
-        weights_error_method setter.
-        Args:
-            value: New weights_error_method to set to the node weights configuration.
-        """
-        self._weights_error_method = value
-        self.weights_quantization_params_fn = get_weights_quantization_params_fn(weights_quantization_method=self.weights_quantization_method)
-    def set_weights_quantization_fn(self, weights_quantization_fn: Callable):
-        """
-        Sets weights quantization function for the node.
-        Args:
-            weights_quantization_fn: Function for quantazing the weights.
+        self.weights_quantization_params = {}
-        """
-        self.weights_quantization_fn = weights_quantization_fn
+        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
+        self.weights_error_method = None
+        self.l_p_value = None
-    def set_weights_quantization_params_fn(self, weights_quantization_params_fn: Callable):
-        """
-        Sets weights params function for the node.
-        Args:
-            weights_quantization_params_fn: Function for calculating the weights params.
-        """
-        self.weights_quantization_params_fn = weights_quantization_params_fn
+    def set_qc(self, qc: QuantizationConfig):
+        # TODO irena: temporary keep the fields to not break everything at once.
+        self.weights_error_method = qc.weights_error_method
+        self.l_p_value = qc.l_p_value
     def set_weights_quantization_param(self,
                                        weights_params: dict):
@@ -334,31 +235,6 @@ class WeightsAttrQuantizationConfig:
         for param_name, param_value in weights_params.items():
             self.weights_quantization_params[param_name] = param_value
-    def calculate_and_set_weights_params(self, tensor_data: np.ndarray, min_threshold: float):
-        """
-        Args:
-            tensor_data: Tensor content as Numpy array.
-            min_threshold: A minimal threshold to set as quantization parameter.
-        Returns:
-            Recalculated weights quantization params from the kernel and channel axis.
-        """
-        assert self.enable_weights_quantization
-        assert not (self.weights_per_channel_threshold and self.weights_channels_axis is None), \
-            "Trying to calculate threshold per channel, channel axis in None."
-        if self.weights_quantization_params_fn is not None:
-            self.set_weights_quantization_param(
-                self.weights_quantization_params_fn(tensor_data,
-                                                    p=self.l_p_value,
-                                                    n_bits=self.weights_n_bits,
-                                                    per_channel=self.weights_per_channel_threshold and self.weights_channels_axis is not None,
-                                                    channel_axis=self.weights_channels_axis.output,  # output channel axis
-                                                    min_threshold=min_threshold)[0]  # Take only first output, the q-params, as axis is already chosen.
-            )
-        else:
-            self.set_weights_quantization_param({})
     def __eq__(self, other: Any) -> bool:
         """
         Compares the object to another object to find if they are equal.
@@ -372,20 +248,16 @@ class WeightsAttrQuantizationConfig:
         if not isinstance(other, WeightsAttrQuantizationConfig):
             return False  # pragma: no cover
-        return self.weights_quantization_fn == other.weights_quantization_fn and \
-               self.weights_quantization_params_fn == other.weights_quantization_params_fn and \
-               self.weights_channels_axis == other.weights_channels_axis and \
-               self.weights_error_method == other.weights_error_method and \
+        return self.weights_channels_axis == other.weights_channels_axis and \
                self.weights_quantization_method == other.weights_quantization_method and \
                self.weights_n_bits == other.weights_n_bits and \
                self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
                self.enable_weights_quantization == other.enable_weights_quantization and \
+               self.weights_error_method == other.weights_error_method and \
                self.l_p_value == other.l_p_value
     def __hash__(self):
-        return hash((self.weights_quantization_fn,
-                     self.weights_quantization_params_fn,
-                     self.weights_channels_axis,
+        return hash((self.weights_channels_axis,
                      self.weights_error_method,
                      self.weights_quantization_method,
                      self.weights_n_bits,
@@ -399,23 +271,19 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
     Holding a mapping between the node's weights attributes and their quantization configurations,
     in addition to quantization parameters that are global for all attributes of the represented node.
     """
-    def __init__(self, qc: QuantizationConfig,
+    def __init__(self,
                  op_cfg: OpQuantizationConfig,
                  weights_channels_axis: ChannelAxisMapping,
                  node_attrs_list: List[str]):
         """
         Args:
-            qc: QuantizationConfig to create the node's config from.
             op_cfg: OpQuantizationConfig of the node with quantizers types to use when creating node quantization configuration.
             weights_channels_axis: Axis to quantize a node's weights attribute when quantizing per-channel.
             node_attrs_list: A list of the node's weights attributes names.
         """
-        self.min_threshold = qc.min_threshold
         self.simd_size = op_cfg.simd_size
-        self.weights_second_moment_correction = qc.weights_second_moment_correction
-        self.weights_bias_correction = qc.weights_bias_correction
         # Initialize a quantization configuration for each of the node's attributes
         self.attributes_config_mapping = {}
@@ -427,7 +295,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                 # POS_ATTR string. If none are found, it indicates that no specific quantization config is defined for
                 # positional weights, so the default config will be used instead.
                 attrs_included_in_name = {k: v for k, v in op_cfg.attr_weights_configs_mapping.items() if
-                                          POS_ATTR in k}
+                                          POSITIONAL_ATTR in k}
                 if len(attrs_included_in_name) > 1:  # pragma: no cover
                     raise ValueError(f"Found multiple attribute in FQC OpConfig that are contained "
@@ -443,8 +311,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                     attr_cfg = list(attrs_included_in_name.values())[0]
                 # Register this attribute under the positional attributes config mapping.
-                self.pos_attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(qc=qc,
-                                                                                         weights_attr_cfg=attr_cfg,
+                self.pos_attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(weights_attr_cfg=attr_cfg,
                                                                                          weights_channels_axis=
                                                                                          weights_channels_axis)
             else:
@@ -461,9 +328,18 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                 else:
                     attr_cfg = list(attrs_included_in_name.values())[0]
-                self.attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(qc=qc,
-                                                                                     weights_attr_cfg=attr_cfg,
+                self.attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(weights_attr_cfg=attr_cfg,
                                                                                      weights_channels_axis=weights_channels_axis)
+        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
+        self.min_threshold = None
+        self.weights_second_moment_correction = None
+        self.weights_bias_correction = None
+    def set_qc(self, qc: QuantizationConfig):
+        # TODO irena: temporary keep the fields to not break everything at once.
+        self.min_threshold = qc.min_threshold
+        self.weights_second_moment_correction = qc.weights_second_moment_correction
+        self.weights_bias_correction = qc.weights_bias_correction
     def get_attr_config(self, attr_name: 'WeightAttrT') -> WeightsAttrQuantizationConfig:
         """

model_compression_toolkit/core/common/quantization/quantization_fn_selection.py CHANGED Viewed

@@ -14,15 +14,35 @@
 # ==============================================================================
 from collections.abc import Callable
-from functools import partial
 from mct_quantizers import QuantizationMethod
+from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.quantization.quantizers.lut_kmeans_quantizer import lut_kmeans_quantizer
 from model_compression_toolkit.core.common.quantization.quantizers.uniform_quantizers import power_of_two_quantizer, \
     symmetric_quantizer, uniform_quantizer
+def get_activation_quantization_fn(activation_quantization_cfg: NodeActivationQuantizationConfig,
+                                   get_activation_quantization_fn_factory: Callable) -> Callable:
+    """
+    Get activation quantizer based on activation quantization configuration.
+    Args:
+        activation_quantization_cfg: activation quantization configuration.
+        get_activation_quantization_fn_factory: activation quantization functions factory.
+    Returns:
+        Activation quantizer that accepts a tensor and returns a quantized tensor.
+    """
+    quantizer_factory = get_activation_quantization_fn_factory(
+        activation_quantization_cfg.activation_quantization_method)
+    quantizer = quantizer_factory(activation_quantization_cfg.activation_n_bits,
+                                  activation_quantization_cfg.activation_quantization_params)
+    return quantizer
 def get_weights_quantization_fn(weights_quantization_method: QuantizationMethod) -> Callable:
     """
     Generate a function for weight quantization.

model_compression_toolkit/core/common/quantization/quantization_params_generation/__init__.py CHANGED Viewed

@@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.core.common.quantization.quantization_params_generation.power_of_two_selection import power_of_two_no_clipping_selection_min_max, \
-    power_of_two_selection_histogram, power_of_two_selection_tensor
-from model_compression_toolkit.core.common.quantization.quantization_params_generation.lut_kmeans_params import lut_kmeans_tensor
-from model_compression_toolkit.core.common.quantization.quantization_params_generation.symmetric_selection import symmetric_no_clipping_selection_min_max
-from model_compression_toolkit.core.common.quantization.quantization_params_generation.uniform_selection import uniform_no_clipping_selection_min_max
+from model_compression_toolkit.core.common.quantization.quantization_params_generation.power_of_two_selection import (
+    power_of_two_no_clipping_selection_min_max, power_of_two_selection_histogram, power_of_two_selection_tensor)
+from model_compression_toolkit.core.common.quantization.quantization_params_generation.lut_kmeans_params import (
+    lut_kmeans_tensor, lut_kmeans_histogram)
+from model_compression_toolkit.core.common.quantization.quantization_params_generation.symmetric_selection import (
+    symmetric_no_clipping_selection_min_max, symmetric_selection_histogram, symmetric_selection_tensor)
+from model_compression_toolkit.core.common.quantization.quantization_params_generation.uniform_selection import (
+    uniform_no_clipping_selection_min_max, uniform_selection_histogram, uniform_selection_tensor)
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.outlier_filter import z_score_filter

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py CHANGED Viewed

@@ -13,17 +13,59 @@
 # limitations under the License.
 # ==============================================================================
 import numpy as np
-from typing import Dict, Union, Optional, Tuple
+from typing import Dict, Union, Optional, Tuple, Callable
 from mct_quantizers import QuantizationMethod
-from model_compression_toolkit.core import QuantizationErrorMethod
+import model_compression_toolkit.core.common.quantization.quantization_params_generation as qpg
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
-from model_compression_toolkit.core.common.quantization import quantization_params_generation
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod
+def compute_activation_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
+                               node_prior_info: NodePriorInfo,
+                               out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
+    """
+    Compute the activations params for a given node in a graph according to a params function.
+    Args:
+        activation_quant_cfg: node's activation quantization configuration.
+        node_prior_info: Prior info collected for the node that is being quantized.
+        out_stats_container: Tensor containing output statistics of the node.
+    Returns:
+        The computed activation quantization params.
+    """
+    activation_quantization_params_fn = _get_activation_quantization_params_fn(
+        activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
+    # Extract and filter histogram data from the statistics container.
+    bins_values, bins_counts = _get_histogram_data(activation_quant_cfg, out_stats_container)
+    # Retrieve the minimum and maximum values from the statistics container.
+    min_value, max_value = out_stats_container.get_min_max_values()
+    # Determine if the activations should be considered signed.
+    signed = _determine_signedness(activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
-def get_histogram_data(
+    # Compute and return the activation quantization parameters.
+    return activation_quantization_params_fn(
+        bins_values,
+        bins_counts,
+        activation_quant_cfg.l_p_value,
+        activation_quant_cfg.activation_n_bits,
+        min_value,
+        max_value,
+        min_threshold=activation_quant_cfg.min_threshold,
+        quant_error_method=activation_quant_cfg.activation_error_method,
+        is_signed=signed
+    )
+def _get_histogram_data(
     activation_quant_cfg: NodeActivationQuantizationConfig,
     out_stats_container: BaseStatsCollector
 ) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
@@ -38,7 +80,6 @@ def get_histogram_data(
         A tuple containing the filtered bins_values and bins_counts.
     """
     bins_values, bins_counts = None, None
     # If the statistics container collected the histogram, we start by filtering outliers using z threshold
     # filtering, and then computing the threshold based on the filtered histogram.
     if out_stats_container.require_collection():
@@ -46,14 +87,15 @@ def get_histogram_data(
             bins_values, bins_counts = out_stats_container.weighted_hc.get_histogram()
         else:
             bins_values, bins_counts = out_stats_container.hc.get_histogram()
-        bins_counts = quantization_params_generation.z_score_filter(
+        bins_counts = qpg.z_score_filter(
             activation_quant_cfg.z_threshold,
             bins_values,
             bins_counts
         )
     return bins_values, bins_counts
-def determine_signedness(
+def _determine_signedness(
     activation_quant_cfg: NodeActivationQuantizationConfig,
     nodes_prior_info: NodePriorInfo,
     min_value: float,
@@ -83,73 +125,37 @@ def determine_signedness(
     return np.any(bins_values[:-1][bins_counts > 0] < 0)
-def update_activation_quantization_params_fn(
-        activation_quant_cfg: NodeActivationQuantizationConfig,
-        nodes_prior_info: NodePriorInfo):
-    """
-    Update the activation quantization parameters function based on the quantization method
-    and whether the node's output is bounded.
+_activation_quant_params_fns = {
+    QuantizationMethod.POWER_OF_TWO: qpg.power_of_two_selection_histogram,
+    QuantizationMethod.SYMMETRIC: qpg.symmetric_selection_histogram,
+    QuantizationMethod.UNIFORM: qpg.uniform_selection_histogram,
+    QuantizationMethod.LUT_POT_QUANTIZER: qpg.lut_kmeans_histogram
+}
+_activation_no_clipping_quant_params_fns = {
+    QuantizationMethod.POWER_OF_TWO: qpg.power_of_two_no_clipping_selection_min_max,
+    QuantizationMethod.SYMMETRIC: qpg.symmetric_no_clipping_selection_min_max,
+    QuantizationMethod.UNIFORM: qpg.uniform_no_clipping_selection_min_max,
+    QuantizationMethod.LUT_POT_QUANTIZER: qpg.lut_kmeans_histogram
+}
-    Args:
-        activation_quant_cfg: Node's activation quantization configuration.
-        nodes_prior_info: Prior info collected for the node that is being quantized.
-    """
-    if nodes_prior_info.is_output_bounded():
-        if activation_quant_cfg.activation_quantization_method == QuantizationMethod.POWER_OF_TWO:
-            activation_quant_cfg.set_activation_quantization_params_fn(
-                quantization_params_generation.power_of_two_no_clipping_selection_min_max
-            )
-        elif activation_quant_cfg.activation_quantization_method == QuantizationMethod.SYMMETRIC:
-            activation_quant_cfg.set_activation_quantization_params_fn(
-                quantization_params_generation.symmetric_no_clipping_selection_min_max
-            )
-        elif activation_quant_cfg.activation_quantization_method == QuantizationMethod.UNIFORM:
-            activation_quant_cfg.set_activation_quantization_params_fn(
-                quantization_params_generation.uniform_no_clipping_selection_min_max
-            )
-def get_activations_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
-                            nodes_prior_info: NodePriorInfo,
-                            out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
+def _get_activation_quantization_params_fn(activation_quantization_method: QuantizationMethod,
+                                           no_clipping: bool) -> Callable:
     """
-    Compute the activations params for a given node in a graph according to a params function.
+    Generate a function for finding activation quantization parameters.
     Args:
-        activation_quant_cfg: node's activation quantization configuration.
-        nodes_prior_info: Prior info collected for the node that is being quantized.
-        out_stats_container: Tensor containing output statistics of the node.
+        activation_quantization_method: Which quantization method to use for activations.
+        no_clipping: Whether to use the no-clipping version of the quantizer (if available).
     Returns:
-        The computed activation quantization params.
+        A function to find the quantization parameters.
     """
-    # Update quantization parameters function based on output bounds and quantization method.
-    update_activation_quantization_params_fn(activation_quant_cfg, nodes_prior_info)
-    # Extract and filter histogram data from the statistics container.
-    bins_values, bins_counts = get_histogram_data(activation_quant_cfg, out_stats_container)
-    # Retrieve the minimum and maximum values from the statistics container.
-    min_value, max_value = out_stats_container.get_min_max_values()
-    # Determine if the activations should be considered signed.
-    signed = determine_signedness(
-        activation_quant_cfg,
-        nodes_prior_info,
-        min_value,
-        bins_values,
-        bins_counts
-    )
-    # Compute and return the activation quantization parameters.
-    return activation_quant_cfg.activation_quantization_params_fn(
-        bins_values,
-        bins_counts,
-        activation_quant_cfg.l_p_value,
-        activation_quant_cfg.activation_n_bits,
-        min_value,
-        max_value,
-        min_threshold=activation_quant_cfg.min_threshold,
-        quant_error_method=activation_quant_cfg.activation_error_method,
-        is_signed=signed
-    )
+    if no_clipping:
+        params_fn = _activation_no_clipping_quant_params_fns.get(activation_quantization_method)
+    else:
+        params_fn = _activation_quant_params_fns.get(activation_quantization_method)
+    if params_fn is None:
+        raise ValueError(f"No parameter function found for the specified quantization method: "
+                         "{activation_quantization_method}")  # pragma: no cover
+    return params_fn

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -25,9 +25,9 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
 from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
     HessianScoresGranularity
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
-    import get_activations_qparams
+    import compute_activation_qparams
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \
-    get_weights_qparams
+    compute_weights_qparams
 from model_compression_toolkit.logger import Logger
@@ -119,21 +119,19 @@ def calculate_quantization_params(graph: Graph,
                             mod_attr_cfg = copy.deepcopy(attr_cfg)
                             mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
-                    weights_params, output_channels_axis = get_weights_qparams(n.get_weights_by_keys(attr),
-                                                                               candidate_qc.weights_quantization_cfg,
-                                                                               mod_attr_cfg,
-                                                                               output_channels_axis,
-                                                                               node=n,
-                                                                               hessian_info_service=hessian_info_service,
-                                                                               num_hessian_samples=num_hessian_samples)
+                    min_threshold = candidate_qc.weights_quantization_cfg.min_threshold
+                    weights_params, output_channels_axis = compute_weights_qparams(n.get_weights_by_keys(attr),
+                                                                                   mod_attr_cfg, output_channels_axis,
+                                                                                   min_threshold=min_threshold, node=n,
+                                                                                   hessian_info_service=hessian_info_service,
+                                                                                   num_hessian_samples=num_hessian_samples)
                     attr_cfg.weights_channels_axis = ChannelAxisMapping(output_channels_axis, attr_cfg.weights_channels_axis.input)
                     attr_cfg.set_weights_quantization_param(weights_params)
             if n.is_activation_quantization_enabled():
                 # If node's activations should be quantized as well, we compute its activation quantization parameters
-                activation_params = get_activations_qparams(
-                    activation_quant_cfg=candidate_qc.activation_quantization_cfg,
-                    nodes_prior_info=n.prior_info,
+                activation_params = compute_activation_qparams(
+                    activation_quant_cfg=candidate_qc.activation_quantization_cfg, node_prior_info=n.prior_info,
                     out_stats_container=graph.get_out_stats_collector(n))
                 # Create a NodeQuantizationConfig containing all quantization params and attach it to the node
                 candidate_qc.activation_quantization_cfg.set_activation_quantization_param(activation_params)

mct-nightly 2.4.0.20250630.629__py3-none-any.whl → 2.4.0.20250702.605__py3-none-any.whl

mct-nightly 2.4.0.20250630.629py3-none-any.whl → 2.4.0.20250702.605py3-none-any.whl