PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py CHANGED Viewed

@@ -13,98 +13,47 @@
 # limitations under the License.
 # ==============================================================================
 import numpy as np
-from typing import Dict, Union, Optional, Tuple, Callable
+from typing import Dict, Union, Optional, Tuple
 from mct_quantizers import QuantizationMethod
-import model_compression_toolkit.core.common.quantization.quantization_params_generation as qpg
-from model_compression_toolkit.constants import MIN_THRESHOLD
+from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
+from model_compression_toolkit.core.common.quantization import quantization_params_generation
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod, \
-    QuantizationConfig
-def compute_activation_qparams(quant_cfg: QuantizationConfig,
-                               node_activation_quant_cfg: NodeActivationQuantizationConfig,
-                               node_prior_info: NodePriorInfo,
-                               out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
-    """
-    Compute the activations params for a given node in a graph according to a params function.
-    Args:
-        quant_cfg: quantization config.
-        node_activation_quant_cfg: node's activation quantization configuration.
-        node_prior_info: Prior info collected for the node that is being quantized.
-        out_stats_container: Tensor containing output statistics of the node.
-    Returns:
-        The computed activation quantization params.
-    """
-    activation_quantization_params_fn = _get_activation_quantization_params_fn(
-        node_activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
-    # Extract and filter histogram data from the statistics container.
-    z_threshold = quant_cfg.z_threshold
-    if node_activation_quant_cfg.z_threshold is not None:
-        z_threshold = node_activation_quant_cfg.z_threshold
-    bins_values, bins_counts = _get_histogram_data(out_stats_container,
-                                                   activation_error_method=quant_cfg.activation_error_method,
-                                                   z_threshold=z_threshold)
-    # Retrieve the minimum and maximum values from the statistics container.
-    min_value, max_value = out_stats_container.get_min_max_values()
-    # Determine if the activations should be considered signed.
-    signed = _determine_signedness(node_activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
-    # Compute and return the activation quantization parameters.
-    return activation_quantization_params_fn(
-        bins_values,
-        bins_counts,
-        quant_cfg.l_p_value,
-        node_activation_quant_cfg.activation_n_bits,
-        min_value,
-        max_value,
-        min_threshold=MIN_THRESHOLD,
-        quant_error_method=quant_cfg.activation_error_method,
-        is_signed=signed
-    )
-def _get_histogram_data(out_stats_container: BaseStatsCollector,
-                        activation_error_method: QuantizationErrorMethod,
-                        z_threshold: float) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+def get_histogram_data(
+    activation_quant_cfg: NodeActivationQuantizationConfig,
+    out_stats_container: BaseStatsCollector
+) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
     """
     Extract and filter the histogram data from the statistics container.
     Args:
+        activation_quant_cfg: Node's activation quantization configuration.
         out_stats_container: Statistics container with histogram data.
-        activation_error_method: activation quantization error method.
-        z_threshold: z threshold for z-score filtering.
     Returns:
         A tuple containing the filtered bins_values and bins_counts.
     """
     bins_values, bins_counts = None, None
     # If the statistics container collected the histogram, we start by filtering outliers using z threshold
     # filtering, and then computing the threshold based on the filtered histogram.
     if out_stats_container.require_collection():
-        if activation_error_method == QuantizationErrorMethod.HMSE:
+        if activation_quant_cfg.activation_error_method == QuantizationErrorMethod.HMSE:
             bins_values, bins_counts = out_stats_container.weighted_hc.get_histogram()
         else:
             bins_values, bins_counts = out_stats_container.hc.get_histogram()
-        bins_counts = qpg.z_score_filter(
-            z_threshold,
+        bins_counts = quantization_params_generation.z_score_filter(
+            activation_quant_cfg.z_threshold,
             bins_values,
             bins_counts
         )
     return bins_values, bins_counts
-def _determine_signedness(
+def determine_signedness(
     activation_quant_cfg: NodeActivationQuantizationConfig,
     nodes_prior_info: NodePriorInfo,
     min_value: float,
@@ -134,37 +83,73 @@ def _determine_signedness(
     return np.any(bins_values[:-1][bins_counts > 0] < 0)
-_activation_quant_params_fns = {
-    QuantizationMethod.POWER_OF_TWO: qpg.power_of_two_selection_histogram,
-    QuantizationMethod.SYMMETRIC: qpg.symmetric_selection_histogram,
-    QuantizationMethod.UNIFORM: qpg.uniform_selection_histogram,
-    QuantizationMethod.LUT_POT_QUANTIZER: qpg.lut_kmeans_histogram
-}
-_activation_no_clipping_quant_params_fns = {
-    QuantizationMethod.POWER_OF_TWO: qpg.power_of_two_no_clipping_selection_min_max,
-    QuantizationMethod.SYMMETRIC: qpg.symmetric_no_clipping_selection_min_max,
-    QuantizationMethod.UNIFORM: qpg.uniform_no_clipping_selection_min_max,
-    QuantizationMethod.LUT_POT_QUANTIZER: qpg.lut_kmeans_histogram
-}
+def update_activation_quantization_params_fn(
+        activation_quant_cfg: NodeActivationQuantizationConfig,
+        nodes_prior_info: NodePriorInfo):
+    """
+    Update the activation quantization parameters function based on the quantization method
+    and whether the node's output is bounded.
-def _get_activation_quantization_params_fn(activation_quantization_method: QuantizationMethod,
-                                           no_clipping: bool) -> Callable:
+    Args:
+        activation_quant_cfg: Node's activation quantization configuration.
+        nodes_prior_info: Prior info collected for the node that is being quantized.
     """
-    Generate a function for finding activation quantization parameters.
+    if nodes_prior_info.is_output_bounded():
+        if activation_quant_cfg.activation_quantization_method == QuantizationMethod.POWER_OF_TWO:
+            activation_quant_cfg.set_activation_quantization_params_fn(
+                quantization_params_generation.power_of_two_no_clipping_selection_min_max
+            )
+        elif activation_quant_cfg.activation_quantization_method == QuantizationMethod.SYMMETRIC:
+            activation_quant_cfg.set_activation_quantization_params_fn(
+                quantization_params_generation.symmetric_no_clipping_selection_min_max
+            )
+        elif activation_quant_cfg.activation_quantization_method == QuantizationMethod.UNIFORM:
+            activation_quant_cfg.set_activation_quantization_params_fn(
+                quantization_params_generation.uniform_no_clipping_selection_min_max
+            )
+def get_activations_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
+                            nodes_prior_info: NodePriorInfo,
+                            out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
+    """
+    Compute the activations params for a given node in a graph according to a params function.
     Args:
-        activation_quantization_method: Which quantization method to use for activations.
-        no_clipping: Whether to use the no-clipping version of the quantizer (if available).
+        activation_quant_cfg: node's activation quantization configuration.
+        nodes_prior_info: Prior info collected for the node that is being quantized.
+        out_stats_container: Tensor containing output statistics of the node.
     Returns:
-        A function to find the quantization parameters.
+        The computed activation quantization params.
     """
-    if no_clipping:
-        params_fn = _activation_no_clipping_quant_params_fns.get(activation_quantization_method)
-    else:
-        params_fn = _activation_quant_params_fns.get(activation_quantization_method)
-    if params_fn is None:
-        raise ValueError(f"No parameter function found for the specified quantization method: "
-                         "{activation_quantization_method}")  # pragma: no cover
-    return params_fn
+    # Update quantization parameters function based on output bounds and quantization method.
+    update_activation_quantization_params_fn(activation_quant_cfg, nodes_prior_info)
+    # Extract and filter histogram data from the statistics container.
+    bins_values, bins_counts = get_histogram_data(activation_quant_cfg, out_stats_container)
+    # Retrieve the minimum and maximum values from the statistics container.
+    min_value, max_value = out_stats_container.get_min_max_values()
+    # Determine if the activations should be considered signed.
+    signed = determine_signedness(
+        activation_quant_cfg,
+        nodes_prior_info,
+        min_value,
+        bins_values,
+        bins_counts
+    )
+    # Compute and return the activation quantization parameters.
+    return activation_quant_cfg.activation_quantization_params_fn(
+        bins_values,
+        bins_counts,
+        activation_quant_cfg.l_p_value,
+        activation_quant_cfg.activation_n_bits,
+        min_value,
+        max_value,
+        min_threshold=activation_quant_cfg.min_threshold,
+        quant_error_method=activation_quant_cfg.activation_error_method,
+        is_signed=signed
+    )

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -18,21 +18,44 @@ from tqdm import tqdm
 from typing import List, Callable, Generator
 from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
-from model_compression_toolkit.core import QuantizationErrorMethod, QuantizationConfig
+from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.core.common import Graph, BaseNode
-from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
     HessianScoresGranularity
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
-    import compute_activation_qparams
+    import get_activations_qparams
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \
-    compute_weights_qparams
+    get_weights_qparams
 from model_compression_toolkit.logger import Logger
+def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[BaseNode]:
+    """
+    Collects nodes that are compatiable for parameters selection search using HMSE,
+    that is, have a kernel attribute that is configured for HMSE error method.
+    Args:
+        nodes_list: A list of nodes to search quantization parameters for.
+        graph: Graph to compute its nodes' quantization parameters..
+    Returns: A (possibly empty) list of nodes.
+    """
+    hmse_nodes = []
+    for n in nodes_list:
+        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
+        kernel_attr_name = None if kernel_attr_name is None or len(kernel_attr_name) == 0 else kernel_attr_name[0]
+        if kernel_attr_name is not None and n.is_weights_quantization_enabled(kernel_attr_name) and \
+            all([c.weights_quantization_cfg.get_attr_config(kernel_attr_name).weights_error_method ==
+                 QuantizationErrorMethod.HMSE for c in n.candidates_quantization_cfg]):
+            hmse_nodes.append(n)
+    return hmse_nodes
 def calculate_quantization_params(graph: Graph,
-                                  quant_cfg: QuantizationConfig,
                                   fw_impl: FrameworkImplementation,
                                   repr_data_gen_fn: Callable[[], Generator],
                                   nodes: List[BaseNode] = None,
@@ -47,7 +70,6 @@ def calculate_quantization_params(graph: Graph,
     Args:
         graph: Graph to compute its nodes' thresholds.
-        quant_cfg: quantization config.
         fw_impl: FrameworkImplementation object.
         repr_data_gen_fn: callable returning representative dataset generator.
         nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
@@ -65,16 +87,15 @@ def calculate_quantization_params(graph: Graph,
     # Collecting nodes that are configured to search weights quantization parameters using HMSE optimization
     # and computing required Hessian information to be used for HMSE parameters selection.
     # The Hessian scores are computed and stored in the hessian_info_service object.
-    if quant_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
-        nodes_for_hmse = [n for n in nodes_list if n.kernel_attr and n.is_weights_quantization_enabled(n.kernel_attr)]
-        if nodes_for_hmse:
-            dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
-            request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
-                                           granularity=HessianScoresGranularity.PER_ELEMENT,
-                                           data_loader=dataloader,
-                                           n_samples=num_hessian_samples,
-                                           target_nodes=nodes_for_hmse)
-            hessian_info_service.fetch_hessian(request)
+    nodes_for_hmse = _collect_nodes_for_hmse(nodes_list, graph)
+    if len(nodes_for_hmse) > 0:
+        dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
+        request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
+                                       granularity=HessianScoresGranularity.PER_ELEMENT,
+                                       data_loader=dataloader,
+                                       n_samples=num_hessian_samples,
+                                       target_nodes=nodes_for_hmse)
+        hessian_info_service.fetch_hessian(request)
     for n in tqdm(nodes_list, "Calculating quantization parameters"):  # iterate only nodes that we should compute their thresholds
         for candidate_qc in n.candidates_quantization_cfg:
@@ -82,34 +103,43 @@ def calculate_quantization_params(graph: Graph,
                 if n.is_weights_quantization_enabled(attr):
                     # If the node's weights attribute should be quantized, we compute its quantization parameters
                     attr_cfg = candidate_qc.weights_quantization_cfg.get_attr_config(attr)
-                    output_channels_axis = attr_cfg.weights_channels_axis.output
+                    channels_axis = attr_cfg.weights_channels_axis
+                    if channels_axis is not None:
+                        output_channels_axis = channels_axis[0]
+                    else:
+                        output_channels_axis = None
-                    weights_error_method = quant_cfg.weights_error_method
-                    if weights_error_method == QuantizationErrorMethod.HMSE:
+                    mod_attr_cfg = attr_cfg
+                    if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
                         # Although we collected nodes for HMSE before running the loop, we keep this verification to
                         # notify the user in case of HMSE configured for node that is not compatible for this method
-                        if n.kernel_attr is None or n.kernel_attr not in attr:
+                        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
+                        if len(kernel_attr_name) > 0:
+                            kernel_attr_name = kernel_attr_name[0]
+                        if kernel_attr_name is None or kernel_attr_name not in attr:
                             Logger.warning(f"The HMSE error method for parameters selection is only supported for "
                                            f"kernel weights attributes. Running parameters selection for attribute "
                                            f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
-                            weights_error_method = QuantizationErrorMethod.MSE
-                    weights_params, output_channels_axis = compute_weights_qparams(n.get_weights_by_keys(attr),
-                                                                                   attr_cfg,
-                                                                                   weights_error_method,
-                                                                                   quant_cfg.l_p_value,
-                                                                                   output_channels_axis,
-                                                                                   node=n,
-                                                                                   hessian_info_service=hessian_info_service,
-                                                                                   num_hessian_samples=num_hessian_samples)
-                    attr_cfg.weights_channels_axis = ChannelAxisMapping(output_channels_axis, attr_cfg.weights_channels_axis.input)
+                            mod_attr_cfg = copy.deepcopy(attr_cfg)
+                            mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
+                    weights_params, output_channels_axis = get_weights_qparams(n.get_weights_by_keys(attr),
+                                                                               candidate_qc.weights_quantization_cfg,
+                                                                               mod_attr_cfg,
+                                                                               output_channels_axis,
+                                                                               node=n,
+                                                                               hessian_info_service=hessian_info_service,
+                                                                               num_hessian_samples=num_hessian_samples)
+                    attr_cfg.weights_channels_axis = (output_channels_axis, attr_cfg.weights_channels_axis[1])
                     attr_cfg.set_weights_quantization_param(weights_params)
-            if n.is_activation_quantization_enabled() or n.is_fln_quantization():
+            if n.is_activation_quantization_enabled():
                 # If node's activations should be quantized as well, we compute its activation quantization parameters
-                activation_params = compute_activation_qparams(quant_cfg=quant_cfg,
-                                                               node_activation_quant_cfg=candidate_qc.activation_quantization_cfg,
-                                                               node_prior_info=n.prior_info,
-                                                               out_stats_container=graph.get_out_stats_collector(n))
+                activation_params = get_activations_qparams(
+                    activation_quant_cfg=candidate_qc.activation_quantization_cfg,
+                    nodes_prior_info=n.prior_info,
+                    out_stats_container=graph.get_out_stats_collector(n))
                 # Create a NodeQuantizationConfig containing all quantization params and attach it to the node
                 candidate_qc.activation_quantization_cfg.set_activation_quantization_param(activation_params)

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py CHANGED Viewed

@@ -12,43 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from functools import partial
-from typing import Dict, Any, Tuple, Callable, TYPE_CHECKING
+from typing import Dict, Any, Tuple
 import numpy as np
-from mct_quantizers import QuantizationMethod
-from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES, MIN_THRESHOLD
-from model_compression_toolkit.core import QuantizationErrorMethod
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
 from model_compression_toolkit.core.common.hessian import HessianInfoService
-from model_compression_toolkit.core.common.quantization.quantization_params_generation import \
-    power_of_two_selection_tensor, lut_kmeans_tensor, symmetric_selection_tensor, uniform_selection_tensor
+from model_compression_toolkit.defaultdict import DefaultDict
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
+    WeightsAttrQuantizationConfig
 from model_compression_toolkit.logger import Logger
-if TYPE_CHECKING:
-    from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
-def compute_weights_qparams(weights_attr_data: np.ndarray,
-                            attr_quant_config: 'WeightsAttrQuantizationConfig',
-                            weights_error_method: QuantizationErrorMethod,
-                            l_p_value: int,
-                            output_channels_axis: int,
-                            min_threshold: float = MIN_THRESHOLD,
-                            node=None,
-                            hessian_info_service: HessianInfoService = None,
-                            num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Tuple[Dict[Any, Any], int]:
+def get_weights_qparams(weights_attr_values: np.ndarray,
+                        weights_quant_config: NodeWeightsQuantizationConfig,
+                        attr_quant_config: WeightsAttrQuantizationConfig,
+                        output_channels_axis: int,
+                        node=None,
+                        hessian_info_service: HessianInfoService = None,
+                        num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Tuple[Dict[Any, Any], int]:
     """
     Compute thresholds to quantize a kernel according to a NodeWeightsQuantizationConfig
     instance.
     Args:
-        weights_attr_data: Weights attribute parameter to compute the quantization thresholds for.
+        weights_attr_values: Weights attribute parameter to compute the quantization thresholds for.
+        weights_quant_config: Weights quantization configuration to define how the thresholds are computed.
         attr_quant_config: A specific weights attribute quantization configuration to get its params.
-        weights_error_method: quantization error method.
-        l_p_value: p-norm to use for the Lp-norm distance.
         output_channels_axis: Index of the kernel output channels dimension.
-        min_threshold: Minimal threshold to use if threshold is too small.
         node: The node for which the quantization error is computed (used only with HMSE error method).
         hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
         num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
@@ -57,43 +49,22 @@ def compute_weights_qparams(weights_attr_data: np.ndarray,
         A dictionary with the quantization threshold of the kernel.
         Selected quantization channel axis.
     """
-    params_fn = _get_weights_quantization_params_fn(attr_quant_config.weights_quantization_method)
-    weights_params, output_channels_axis = params_fn(
-        weights_attr_data,
-        p=l_p_value,
-        n_bits=attr_quant_config.weights_n_bits,
-        per_channel=attr_quant_config.weights_per_channel_threshold,
-        channel_axis=output_channels_axis,
-        min_threshold=min_threshold,
-        quant_error_method=weights_error_method,
-        node=node,
-        hessian_info_service=hessian_info_service,
-        num_hessian_samples=num_hessian_samples)
+    if attr_quant_config.weights_quantization_params_fn is not None:
+        weights_params, output_channels_axis = attr_quant_config.weights_quantization_params_fn(
+            weights_attr_values,
+            p=attr_quant_config.l_p_value,
+            n_bits=attr_quant_config.weights_n_bits,
+            per_channel=attr_quant_config.weights_per_channel_threshold,
+            channel_axis=output_channels_axis,
+            min_threshold=weights_quant_config.min_threshold,
+            quant_error_method=attr_quant_config.weights_error_method,
+            node=node,
+            hessian_info_service=hessian_info_service,
+            num_hessian_samples=num_hessian_samples)
+    else:  # pragma: no cover
+        Logger.error(f"Requested weights quantization parameters computation for node {node.name} without providing a "
+                     f"weights_quantization_params_fn."
+                     f"Returning an empty dictionary since no quantization parameters were computed.")
+        weights_params = {}
     return weights_params, output_channels_axis
-_weights_quant_params_fns = {
-    QuantizationMethod.POWER_OF_TWO: power_of_two_selection_tensor,
-    QuantizationMethod.SYMMETRIC: symmetric_selection_tensor,
-    QuantizationMethod.UNIFORM: uniform_selection_tensor,
-    QuantizationMethod.LUT_POT_QUANTIZER: partial(lut_kmeans_tensor, is_symmetric=False),
-    QuantizationMethod.LUT_SYM_QUANTIZER: partial(lut_kmeans_tensor, is_symmetric=True)
-}
-def _get_weights_quantization_params_fn(weights_quantization_method: QuantizationMethod) -> Callable:
-    """
-    Generate a function for finding weights quantization parameters.
-    Args:
-        weights_quantization_method: Which quantization method to use for weights.
-    Returns:
-        A function to find the quantization parameters.
-    """
-    params_fn = _weights_quant_params_fns.get(weights_quantization_method)
-    if not params_fn:
-        Logger.critical(
-            f"No parameter function found for the specified quantization method: {weights_quantization_method}")  # pragma: no cover
-    return params_fn

model_compression_toolkit/core/common/quantization/quantize_node.py CHANGED Viewed

@@ -12,7 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.core.common.quantization.quantization_fn_selection import get_weights_quantization_fn
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
@@ -46,12 +47,11 @@ def get_quantized_weights_attr_by_qc(attr_name: str,
         output_channels_axis = None
     Logger.debug(f'quantizing layer {n.name} attribute {attr_name} with {weights_qc.weights_n_bits} bits')
-    weights_quantization_fn = get_weights_quantization_fn(weights_qc.weights_quantization_method)
-    quantized_kernel = weights_quantization_fn(n.get_weights_by_keys(attr_name),
-                                               n_bits=weights_qc.weights_n_bits,
-                                               signed=True,
-                                               quantization_params=weights_qc.weights_quantization_params,
-                                               per_channel=weights_qc.weights_per_channel_threshold,
-                                               output_channels_axis=output_channels_axis)
+    quantized_kernel = weights_qc.weights_quantization_fn(n.get_weights_by_keys(attr_name),
+                                                          n_bits=weights_qc.weights_n_bits,
+                                                          signed=True,
+                                                          quantization_params=weights_qc.weights_quantization_params,
+                                                          per_channel=weights_qc.weights_per_channel_threshold,
+                                                          output_channels_axis=output_channels_axis)
     return quantized_kernel, channels_axis

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl