PyPI - mct-nightly - Versions diffs - 2.4.0.20250706.701__tar.gz → 2.4.0.20250708.612__tar.gz - Mend

mct-nightly 2.4.0.20250706.701tar.gz → 2.4.0.20250708.612tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (543) hide show

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mct-nightly
-Version: 2.4.0.20250706.701
+Version: 2.4.0.20250708.612
 Summary: A Model Compression Toolkit for neural networks
 Author-email: ssi-dnn-dev@sony.com
 Classifier: Programming Language :: Python :: 3

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/mct_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mct-nightly
-Version: 2.4.0.20250706.701
+Version: 2.4.0.20250708.612
 Summary: A Model Compression Toolkit for neural networks
 Author-email: ssi-dnn-dev@sony.com
 Classifier: Programming Language :: Python :: 3

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/mct_nightly.egg-info/SOURCES.txt RENAMED Viewed

@@ -25,7 +25,6 @@ model_compression_toolkit/core/common/framework_info.py
 model_compression_toolkit/core/common/memory_computation.py
 model_compression_toolkit/core/common/model_builder_mode.py
 model_compression_toolkit/core/common/model_collector.py
-model_compression_toolkit/core/common/model_validation.py
 model_compression_toolkit/core/common/node_prior_info.py
 model_compression_toolkit/core/common/similarity_analyzer.py
 model_compression_toolkit/core/common/user_info.py
@@ -167,7 +166,6 @@ model_compression_toolkit/core/keras/custom_layer_validation.py
 model_compression_toolkit/core/keras/data_util.py
 model_compression_toolkit/core/keras/default_framework_info.py
 model_compression_toolkit/core/keras/keras_implementation.py
-model_compression_toolkit/core/keras/keras_model_validation.py
 model_compression_toolkit/core/keras/keras_node_prior_info.py
 model_compression_toolkit/core/keras/resource_utilization_data_facade.py
 model_compression_toolkit/core/keras/tf_tensor_numpy.py

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/__init__.py RENAMED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.4.0.20250706.000701"
+__version__ = "2.4.0.20250708.000612"

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/collectors/base_collector.py RENAMED Viewed

@@ -13,11 +13,12 @@
 # limitations under the License.
 # ==============================================================================
+from abc import ABC, abstractmethod
 import numpy as np
 from model_compression_toolkit.logger import Logger
-class BaseCollector(object):
+class BaseCollector(ABC):
     """
     Base class for statistics collection object.
     """
@@ -26,6 +27,7 @@ class BaseCollector(object):
         # When manipulation statistics in a granularity they were not collected by, the data is invalid.
         self.is_legal = True
+    @abstractmethod
     def scale(self, scale_factor: np.ndarray):
         """
         Scale all statistics in collector by some factor.
@@ -37,6 +39,7 @@ class BaseCollector(object):
         raise NotImplemented(
             f'{self.__class__.__name__} needs to implement scale operation for its state.')  # pragma: no cover
+    @abstractmethod
     def shift(self, shift_value: np.ndarray):
         """
         Shift all statistics in collector by some value.

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/collectors/mean_collector.py RENAMED Viewed

@@ -87,10 +87,13 @@ class MeanCollector(BaseCollector):
             x: Tensor that goes through the mean collector and needs to be considered in the mean computation.
         """
         self.i += 1  # Update the iteration index
-        axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
-        n = x.shape[axis]
-        transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
-        mu = np.mean(np.reshape(np.transpose(x, transpose_index), [n, -1]), axis=-1) # mean per channel for a batch
+        if self.axis is None:
+            mu = np.mean(np.reshape(x, [1, -1]), axis=-1)  # mean per channel for a batch
+        else:
+            axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
+            n = x.shape[axis]
+            transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
+            mu = np.mean(np.reshape(np.transpose(x, transpose_index), [n, -1]), axis=-1) # mean per channel for a batch
         self.current_sum += mu # sum of all batches
         self.current_mean = self.current_sum / self.i # mean of all batches

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.py RENAMED Viewed

@@ -130,10 +130,13 @@ class MinMaxPerChannelCollector(BaseCollector):
             x: Tensor that goes through the collector and needs to be considered in the min/max computation.
         """
-        axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
-        n = x.shape[axis]
-        transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
-        x_reshape = np.reshape(np.transpose(x, transpose_index), [n, -1])
+        if self.axis is None:
+            x_reshape = np.reshape(x, [1, -1])
+        else:
+            axis = (len(x.shape) - 1) if self.axis == LAST_AXIS else self.axis
+            n = x.shape[axis]
+            transpose_index = [axis, *[i for i in range(len(x.shape)) if i != axis]]
+            x_reshape = np.reshape(np.transpose(x, transpose_index), [n, -1])
         if self.state is None:
             x_max = np.max(x_reshape, axis=-1)
             x_min = np.min(x_reshape, axis=-1)

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/model_collector.py RENAMED Viewed

@@ -57,19 +57,21 @@ def create_stats_collector_for_node(node: common.BaseNode,
 def create_tensor2node(graph: common.Graph,
-                       node: common.BaseNode):
+                       node: common.BaseNode,
+                       next_node_output_channel_axis: int):
     """
     Force statistic collector creation and assignment for a node.
     Args:
         graph: Graph of the node (for retrieving the current tensor).
         node: Node to create a tensor for.
+        next_node_output_channel_axis: channel output axis of next node.
     """
     current_sc = graph.get_out_stats_collector(node)
     is_list_nostat_collectors = isinstance(current_sc, list) and len(
         [sc for sc in current_sc if not isinstance(sc, common.NoStatsCollector)]) == 0
     if isinstance(current_sc, common.NoStatsCollector) or current_sc is None or is_list_nostat_collectors:
-        stats_collector = common.StatsCollector(node.out_channel_axis)
+        stats_collector = common.StatsCollector(next_node_output_channel_axis if node.out_channel_axis is None else node.out_channel_axis)
         graph.set_out_stats_collector_to_node(node, stats_collector)
@@ -157,6 +159,17 @@ class ModelCollector:
         for n in graph.get_topo_sorted_nodes():
             quant_node_in_fln = n.is_fln_quantization() and graph.fusing_info.is_quantized_node_in_fln(n)
             sc = create_stats_collector_for_node(n, quant_node_in_fln=quant_node_in_fln)  # Get static collector for the node
+            if isinstance(sc, common.StatsCollector) and (sc.mc.axis is None or sc.mpcc.axis is None):
+                # Missing output channel axis info, so try to extract it from previous and next nodes output channel axis.
+                possible_output_channel_axis_set = {nn.out_channel_axis for nn in graph.get_next_nodes(n) + graph.get_prev_nodes(n)}
+                # Filter out None values.
+                possible_output_channel_axis_list = list(filter(lambda x: x is not None, possible_output_channel_axis_set))
+                if len(possible_output_channel_axis_list) > 0:
+                    if len(possible_output_channel_axis_list) > 1:
+                        Logger.warning(f'Ambiguous input channel data from next nodes for {n.name}.')
+                    sc.mc.axis = possible_output_channel_axis_list[0]
+                    sc.mpcc.axis = possible_output_channel_axis_list[0]
             # If we use bias correction, and the node has kernel weights to quantize, we need to make sure
             # its previous nodes' tensors are consistent with this node.
             if qc.weights_bias_correction and n.kernel_attr is not None and n.is_weights_quantization_enabled(
@@ -164,7 +177,8 @@ class ModelCollector:
                 for ie in graph.incoming_edges(n):
                     input_node = ie.source_node
                     create_tensor2node(graph,
-                                       input_node)
+                                       input_node,
+                                       n.out_channel_axis)
             if sc is not None:
                 graph.set_out_stats_collector_to_node(n, sc)

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/pruning/memory_calculator.py RENAMED Viewed

@@ -303,7 +303,7 @@ class MemoryCalculator:
             num_oc = np.sum(output_mask)
         else:
             # Get the node channel axis from framework info
-            channel_axis = node.out_channel_axis
+            channel_axis = self.fw_impl.default_output_channel_axis if node.out_channel_axis is None else node.out_channel_axis
             if channel_axis is None:
                 Logger.critical(f"The channel axis is undefined. Please ensure the channel axis is explicitly defined for node {node.type} in the framework info.")

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/quantization/node_quantization_config.py RENAMED Viewed

@@ -18,7 +18,6 @@ from enum import Enum, auto
 from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.constants import POSITIONAL_ATTR
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import \
     AttributeQuantizationConfig, OpQuantizationConfig
@@ -41,6 +40,7 @@ class ActivationQuantizationMode(Enum):
     NO_QUANT = auto()
     FLN_NO_QUANT = auto()
 class BaseNodeQuantizationConfig(object):
     """
     Base class for node quantization configuration
@@ -59,12 +59,11 @@ class BaseNodeQuantizationConfig(object):
             kwargs: A dictionary with additional key arguments.
         """
         if hasattr(self, config_parameter_name):
             setattr(self, config_parameter_name, config_parameter_value)
         else:
-            Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config and "
-                           f"was not updated!")
+            raise AttributeError(
+                f"Parameter {config_parameter_name} could not be found in the node quantization config.")
     def __repr__(self) -> str:
         """
@@ -97,36 +96,11 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
         self.signedness = op_cfg.signedness
         self.activation_quantization_params = {}
-        # TODO irena: computed by compute_activation_bias_correction. shouldnt really be here
+        # TODO: computed by compute_activation_bias_correction. Probably shouldnt be here.
         self.activation_bias_correction_term = None
-        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
-        self.activation_error_method = None
-        self.relu_bound_to_power_of_2 = None
-        self.activation_channel_equalization = None
-        self.input_scaling = None
-        self.min_threshold = None
-        self.l_p_value = None
-        self.shift_negative_activation_correction = None
+        # Z-threshold is a global param from QuantizationConfig, however it can be overridden per node by NetworkEditor.
+        # Since activation qparams are re-computed in several places, it's easier to keep it here and update it once.
         self.z_threshold = None
-        self.shift_negative_ratio = None
-        self.shift_negative_threshold_recalculation = None
-        self.concat_threshold_update = None
-    def set_qc(self, qc: QuantizationConfig):
-        """ TODO irena: temporary keep all the attributes as before not to break all code at once.
-             Eventually all of them should be removed from here. """
-        self.activation_error_method = qc.activation_error_method
-        self.relu_bound_to_power_of_2 = qc.relu_bound_to_power_of_2
-        self.activation_channel_equalization = qc.activation_channel_equalization
-        self.input_scaling = qc.input_scaling
-        self.min_threshold = qc.min_threshold
-        self.l_p_value = qc.l_p_value
-        self.shift_negative_activation_correction = qc.shift_negative_activation_correction
-        self.z_threshold = qc.z_threshold
-        self.shift_negative_ratio = qc.shift_negative_ratio
-        self.shift_negative_threshold_recalculation = qc.shift_negative_threshold_recalculation
-        self.concat_threshold_update = qc.concat_threshold_update
     @property
     def enable_activation_quantization(self):
@@ -148,7 +122,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
             activation_params: Dictionary that contains weight quantization params.
         """
-        assert self.quant_mode == ActivationQuantizationMode.QUANT
+        assert self.quant_mode == ActivationQuantizationMode.QUANT or self.quant_mode == ActivationQuantizationMode.FLN_QUANT
         for param_name, param_value in activation_params.items():
             self.activation_quantization_params[param_name] = param_value
@@ -165,32 +139,16 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
         if not isinstance(other, NodeActivationQuantizationConfig):
             return False  # pragma: no cover
-        return self.activation_error_method == other.activation_error_method and \
-               self.activation_quantization_method == other.activation_quantization_method and \
+        return self.activation_quantization_method == other.activation_quantization_method and \
                self.activation_n_bits == other.activation_n_bits and \
                self.quant_mode == other.quant_mode and \
-               self.activation_channel_equalization == other.activation_channel_equalization and \
-               self.input_scaling == other.input_scaling and \
-               self.min_threshold == other.min_threshold and \
-               self.l_p_value == other.l_p_value and \
-               self.shift_negative_activation_correction == other.shift_negative_activation_correction and \
-               self.z_threshold == other.z_threshold and \
-               self.shift_negative_ratio == other.shift_negative_ratio and \
-               self.shift_negative_threshold_recalculation == other.shift_negative_threshold_recalculation
+               self.signedness == other.signedness
     def __hash__(self):
-        return hash((self.activation_error_method,
-                     self.activation_quantization_method,
+        return hash((self.activation_quantization_method,
                      self.activation_n_bits,
                      self.quant_mode,
-                     self.activation_channel_equalization,
-                     self.input_scaling,
-                     self.min_threshold,
-                     self.l_p_value,
-                     self.shift_negative_activation_correction,
-                     self.z_threshold,
-                     self.shift_negative_ratio,
-                     self.shift_negative_threshold_recalculation))
+                     self.signedness))
 class WeightsAttrQuantizationConfig:
@@ -211,16 +169,8 @@ class WeightsAttrQuantizationConfig:
         self.weights_n_bits = weights_attr_cfg.weights_n_bits
         self.weights_per_channel_threshold = weights_attr_cfg.weights_per_channel_threshold
         self.enable_weights_quantization = weights_attr_cfg.enable_weights_quantization
-        self.weights_quantization_params = {}
-        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
-        self.weights_error_method = None
-        self.l_p_value = None
-    def set_qc(self, qc: QuantizationConfig):
-        # TODO irena: temporary keep the fields to not break everything at once.
-        self.weights_error_method = qc.weights_error_method
-        self.l_p_value = qc.l_p_value
+        self.weights_quantization_params = {}
     def set_weights_quantization_param(self,
                                        weights_params: dict):
@@ -252,18 +202,14 @@ class WeightsAttrQuantizationConfig:
                self.weights_quantization_method == other.weights_quantization_method and \
                self.weights_n_bits == other.weights_n_bits and \
                self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
-               self.enable_weights_quantization == other.enable_weights_quantization and \
-               self.weights_error_method == other.weights_error_method and \
-               self.l_p_value == other.l_p_value
+               self.enable_weights_quantization == other.enable_weights_quantization
     def __hash__(self):
         return hash((self.weights_channels_axis,
-                     self.weights_error_method,
                      self.weights_quantization_method,
                      self.weights_n_bits,
                      self.weights_per_channel_threshold,
-                     self.enable_weights_quantization,
-                     self.l_p_value))
+                     self.enable_weights_quantization))
 class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
@@ -330,16 +276,14 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                 self.attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(weights_attr_cfg=attr_cfg,
                                                                                      weights_channels_axis=weights_channels_axis)
-        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
-        self.min_threshold = None
+        # TODO this is set by batch norm reconstruction substitution when folded batch norms are added back, to mark
+        #  the nodes that the correction should be applied to (for some nodes it gets disabled) and BNs removed.
+        #  The actual correction is only computed when it's applied in ptq, so it seems that both substitutions could
+        #  be unified, and no info need to pass between.
         self.weights_second_moment_correction = None
-        self.weights_bias_correction = None
-    def set_qc(self, qc: QuantizationConfig):
-        # TODO irena: temporary keep the fields to not break everything at once.
-        self.min_threshold = qc.min_threshold
-        self.weights_second_moment_correction = qc.weights_second_moment_correction
-        self.weights_bias_correction = qc.weights_bias_correction
+        # TODO: computed corrected bias is injected to the node config. Probably shouldn't be here. Also it can be
+        #  computed on the final config, instead of all candidates and then there is no need to save it at all.
+        self.bias_corrected = None
     def get_attr_config(self, attr_name: 'WeightAttrT') -> WeightsAttrQuantizationConfig:
         """
@@ -476,8 +420,8 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                 if hasattr(attr_cfg, config_parameter_name):
                     setattr(attr_cfg, config_parameter_name, config_parameter_value)
                 else:
-                    Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config of "
-                                   f"weights attribute {attr_name} and was not updated!")
+                    raise AttributeError(f"Parameter {config_parameter_name} could not be found in the node quantization config of "
+                                         f"weights attribute {attr_name}.")
             else:  # pragma: no cover
                 Logger.critical(f"Weights attribute {attr_name} could not be found to set parameter {config_parameter_name}.")
@@ -494,10 +438,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         if not isinstance(other, NodeWeightsQuantizationConfig):
             return False  # pragma: no cover
-        return self.min_threshold == other.min_threshold and \
-            self.simd_size == other.simd_size and \
-            self.weights_second_moment_correction == other.weights_second_moment_correction and \
-            self.weights_bias_correction == other.weights_bias_correction and \
+        return self.simd_size == other.simd_size and \
             self.attributes_config_mapping.keys() == other.attributes_config_mapping.keys() and \
             all([self.attributes_config_mapping[k] == other.attributes_config_mapping[k]
                  for k in self.attributes_config_mapping.keys()]) and \
@@ -506,9 +447,6 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                  for k in self.pos_attributes_config_mapping.keys()])
     def __hash__(self):
-        return hash((self.min_threshold,
-                     self.simd_size,
-                     self.weights_second_moment_correction,
-                     self.weights_bias_correction,
+        return hash((self.simd_size,
                      frozenset(self.attributes_config_mapping),
                      frozenset(self.pos_attributes_config_mapping)))

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/quantization/quantization_config.py RENAMED Viewed

@@ -90,7 +90,6 @@ class QuantizationConfig:
     shift_negative_activation_correction: bool = True
     activation_channel_equalization: bool = False
     z_threshold: float = math.inf
-    min_threshold: float = MIN_THRESHOLD
     l_p_value: int = 2
     linear_collapsing: bool = True
     residual_collapsing: bool = True

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py RENAMED Viewed

@@ -18,21 +18,25 @@ from typing import Dict, Union, Optional, Tuple, Callable
 from mct_quantizers import QuantizationMethod
 import model_compression_toolkit.core.common.quantization.quantization_params_generation as qpg
+from model_compression_toolkit.constants import MIN_THRESHOLD
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod, \
+    QuantizationConfig
-def compute_activation_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
+def compute_activation_qparams(quant_cfg: QuantizationConfig,
+                               node_activation_quant_cfg: NodeActivationQuantizationConfig,
                                node_prior_info: NodePriorInfo,
                                out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
     """
     Compute the activations params for a given node in a graph according to a params function.
     Args:
-        activation_quant_cfg: node's activation quantization configuration.
+        quant_cfg: quantization config.
+        node_activation_quant_cfg: node's activation quantization configuration.
         node_prior_info: Prior info collected for the node that is being quantized.
         out_stats_container: Tensor containing output statistics of the node.
@@ -40,41 +44,46 @@ def compute_activation_qparams(activation_quant_cfg: NodeActivationQuantizationC
         The computed activation quantization params.
     """
     activation_quantization_params_fn = _get_activation_quantization_params_fn(
-        activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
+        node_activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
     # Extract and filter histogram data from the statistics container.
-    bins_values, bins_counts = _get_histogram_data(activation_quant_cfg, out_stats_container)
+    z_threshold = quant_cfg.z_threshold
+    if node_activation_quant_cfg.z_threshold is not None:
+        z_threshold = node_activation_quant_cfg.z_threshold
+    bins_values, bins_counts = _get_histogram_data(out_stats_container,
+                                                   activation_error_method=quant_cfg.activation_error_method,
+                                                   z_threshold=z_threshold)
     # Retrieve the minimum and maximum values from the statistics container.
     min_value, max_value = out_stats_container.get_min_max_values()
     # Determine if the activations should be considered signed.
-    signed = _determine_signedness(activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
+    signed = _determine_signedness(node_activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
     # Compute and return the activation quantization parameters.
     return activation_quantization_params_fn(
         bins_values,
         bins_counts,
-        activation_quant_cfg.l_p_value,
-        activation_quant_cfg.activation_n_bits,
+        quant_cfg.l_p_value,
+        node_activation_quant_cfg.activation_n_bits,
         min_value,
         max_value,
-        min_threshold=activation_quant_cfg.min_threshold,
-        quant_error_method=activation_quant_cfg.activation_error_method,
+        min_threshold=MIN_THRESHOLD,
+        quant_error_method=quant_cfg.activation_error_method,
         is_signed=signed
     )
-def _get_histogram_data(
-    activation_quant_cfg: NodeActivationQuantizationConfig,
-    out_stats_container: BaseStatsCollector
-) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+def _get_histogram_data(out_stats_container: BaseStatsCollector,
+                        activation_error_method: QuantizationErrorMethod,
+                        z_threshold: float) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
     """
     Extract and filter the histogram data from the statistics container.
     Args:
-        activation_quant_cfg: Node's activation quantization configuration.
         out_stats_container: Statistics container with histogram data.
+        activation_error_method: activation quantization error method.
+        z_threshold: z threshold for z-score filtering.
     Returns:
         A tuple containing the filtered bins_values and bins_counts.
@@ -83,12 +92,12 @@ def _get_histogram_data(
     # If the statistics container collected the histogram, we start by filtering outliers using z threshold
     # filtering, and then computing the threshold based on the filtered histogram.
     if out_stats_container.require_collection():
-        if activation_quant_cfg.activation_error_method == QuantizationErrorMethod.HMSE:
+        if activation_error_method == QuantizationErrorMethod.HMSE:
             bins_values, bins_counts = out_stats_container.weighted_hc.get_histogram()
         else:
             bins_values, bins_counts = out_stats_container.hc.get_histogram()
         bins_counts = qpg.z_score_filter(
-            activation_quant_cfg.z_threshold,
+            z_threshold,
             bins_values,
             bins_counts
         )

{mct_nightly-2.4.0.20250706.701 → mct_nightly-2.4.0.20250708.612}/model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py RENAMED Viewed

@@ -18,7 +18,7 @@ from tqdm import tqdm
 from typing import List, Callable, Generator
 from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
-from model_compression_toolkit.core import QuantizationErrorMethod
+from model_compression_toolkit.core import QuantizationErrorMethod, QuantizationConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
@@ -31,29 +31,8 @@ from model_compression_toolkit.core.common.quantization.quantization_params_gene
 from model_compression_toolkit.logger import Logger
-def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[BaseNode]:
-    """
-    Collects nodes that are compatiable for parameters selection search using HMSE,
-    that is, have a kernel attribute that is configured for HMSE error method.
-    Args:
-        nodes_list: A list of nodes to search quantization parameters for.
-        graph: Graph to compute its nodes' quantization parameters..
-    Returns: A (possibly empty) list of nodes.
-    """
-    hmse_nodes = []
-    for n in nodes_list:
-        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and \
-            all([c.weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_error_method ==
-                 QuantizationErrorMethod.HMSE for c in n.candidates_quantization_cfg]):
-            hmse_nodes.append(n)
-    return hmse_nodes
 def calculate_quantization_params(graph: Graph,
+                                  quant_cfg: QuantizationConfig,
                                   fw_impl: FrameworkImplementation,
                                   repr_data_gen_fn: Callable[[], Generator],
                                   nodes: List[BaseNode] = None,
@@ -68,6 +47,7 @@ def calculate_quantization_params(graph: Graph,
     Args:
         graph: Graph to compute its nodes' thresholds.
+        quant_cfg: quantization config.
         fw_impl: FrameworkImplementation object.
         repr_data_gen_fn: callable returning representative dataset generator.
         nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
@@ -85,15 +65,16 @@ def calculate_quantization_params(graph: Graph,
     # Collecting nodes that are configured to search weights quantization parameters using HMSE optimization
     # and computing required Hessian information to be used for HMSE parameters selection.
     # The Hessian scores are computed and stored in the hessian_info_service object.
-    nodes_for_hmse = _collect_nodes_for_hmse(nodes_list, graph)
-    if len(nodes_for_hmse) > 0:
-        dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
-        request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
-                                       granularity=HessianScoresGranularity.PER_ELEMENT,
-                                       data_loader=dataloader,
-                                       n_samples=num_hessian_samples,
-                                       target_nodes=nodes_for_hmse)
-        hessian_info_service.fetch_hessian(request)
+    if quant_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+        nodes_for_hmse = [n for n in nodes_list if n.kernel_attr and n.is_weights_quantization_enabled(n.kernel_attr)]
+        if nodes_for_hmse:
+            dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
+            request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
+                                           granularity=HessianScoresGranularity.PER_ELEMENT,
+                                           data_loader=dataloader,
+                                           n_samples=num_hessian_samples,
+                                           target_nodes=nodes_for_hmse)
+            hessian_info_service.fetch_hessian(request)
     for n in tqdm(nodes_list, "Calculating quantization parameters"):  # iterate only nodes that we should compute their thresholds
         for candidate_qc in n.candidates_quantization_cfg:
@@ -101,37 +82,34 @@ def calculate_quantization_params(graph: Graph,
                 if n.is_weights_quantization_enabled(attr):
                     # If the node's weights attribute should be quantized, we compute its quantization parameters
                     attr_cfg = candidate_qc.weights_quantization_cfg.get_attr_config(attr)
-                    channels_axis = attr_cfg.weights_channels_axis
-                    if channels_axis is not None:
-                        output_channels_axis = channels_axis[0]
-                    else:
-                        output_channels_axis = None
-                    mod_attr_cfg = attr_cfg
+                    output_channels_axis = attr_cfg.weights_channels_axis.output
-                    if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+                    weights_error_method = quant_cfg.weights_error_method
+                    if weights_error_method == QuantizationErrorMethod.HMSE:
                         # Although we collected nodes for HMSE before running the loop, we keep this verification to
                         # notify the user in case of HMSE configured for node that is not compatible for this method
                         if n.kernel_attr is None or n.kernel_attr not in attr:
                             Logger.warning(f"The HMSE error method for parameters selection is only supported for "
                                            f"kernel weights attributes. Running parameters selection for attribute "
                                            f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
-                            mod_attr_cfg = copy.deepcopy(attr_cfg)
-                            mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
+                            weights_error_method = QuantizationErrorMethod.MSE
-                    min_threshold = candidate_qc.weights_quantization_cfg.min_threshold
                     weights_params, output_channels_axis = compute_weights_qparams(n.get_weights_by_keys(attr),
-                                                                                   mod_attr_cfg, output_channels_axis,
-                                                                                   min_threshold=min_threshold, node=n,
+                                                                                   attr_cfg,
+                                                                                   weights_error_method,
+                                                                                   quant_cfg.l_p_value,
+                                                                                   output_channels_axis,
+                                                                                   node=n,
                                                                                    hessian_info_service=hessian_info_service,
                                                                                    num_hessian_samples=num_hessian_samples)
                     attr_cfg.weights_channels_axis = ChannelAxisMapping(output_channels_axis, attr_cfg.weights_channels_axis.input)
                     attr_cfg.set_weights_quantization_param(weights_params)
-            if n.is_activation_quantization_enabled():
+            if n.is_activation_quantization_enabled() or n.is_fln_quantization():
                 # If node's activations should be quantized as well, we compute its activation quantization parameters
-                activation_params = compute_activation_qparams(
-                    activation_quant_cfg=candidate_qc.activation_quantization_cfg, node_prior_info=n.prior_info,
-                    out_stats_container=graph.get_out_stats_collector(n))
+                activation_params = compute_activation_qparams(quant_cfg=quant_cfg,
+                                                               node_activation_quant_cfg=candidate_qc.activation_quantization_cfg,
+                                                               node_prior_info=n.prior_info,
+                                                               out_stats_container=graph.get_out_stats_collector(n))
                 # Create a NodeQuantizationConfig containing all quantization params and attach it to the node
                 candidate_qc.activation_quantization_cfg.set_activation_quantization_param(activation_params)

mct-nightly 2.4.0.20250706.701__tar.gz → 2.4.0.20250708.612__tar.gz

mct-nightly 2.4.0.20250706.701tar.gz → 2.4.0.20250708.612tar.gz