PyPI - mct-nightly - Versions diffs - 2.4.0.20250705.556__py3-none-any.whl → 2.4.0.20250707.643__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250705.556py3-none-any.whl → 2.4.0.20250707.643py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py CHANGED Viewed

@@ -18,21 +18,25 @@ from typing import Dict, Union, Optional, Tuple, Callable
 from mct_quantizers import QuantizationMethod
 import model_compression_toolkit.core.common.quantization.quantization_params_generation as qpg
+from model_compression_toolkit.constants import MIN_THRESHOLD
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import Signedness
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationErrorMethod, \
+    QuantizationConfig
-def compute_activation_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
+def compute_activation_qparams(quant_cfg: QuantizationConfig,
+                               node_activation_quant_cfg: NodeActivationQuantizationConfig,
                                node_prior_info: NodePriorInfo,
                                out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
     """
     Compute the activations params for a given node in a graph according to a params function.
     Args:
-        activation_quant_cfg: node's activation quantization configuration.
+        quant_cfg: quantization config.
+        node_activation_quant_cfg: node's activation quantization configuration.
         node_prior_info: Prior info collected for the node that is being quantized.
         out_stats_container: Tensor containing output statistics of the node.
@@ -40,41 +44,43 @@ def compute_activation_qparams(activation_quant_cfg: NodeActivationQuantizationC
         The computed activation quantization params.
     """
     activation_quantization_params_fn = _get_activation_quantization_params_fn(
-        activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
+        node_activation_quant_cfg.activation_quantization_method, no_clipping=node_prior_info.is_output_bounded())
     # Extract and filter histogram data from the statistics container.
-    bins_values, bins_counts = _get_histogram_data(activation_quant_cfg, out_stats_container)
+    bins_values, bins_counts = _get_histogram_data(out_stats_container,
+                                                   activation_error_method=quant_cfg.activation_error_method,
+                                                   z_threshold=quant_cfg.z_threshold)
     # Retrieve the minimum and maximum values from the statistics container.
     min_value, max_value = out_stats_container.get_min_max_values()
     # Determine if the activations should be considered signed.
-    signed = _determine_signedness(activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
+    signed = _determine_signedness(node_activation_quant_cfg, node_prior_info, min_value, bins_values, bins_counts)
     # Compute and return the activation quantization parameters.
     return activation_quantization_params_fn(
         bins_values,
         bins_counts,
-        activation_quant_cfg.l_p_value,
-        activation_quant_cfg.activation_n_bits,
+        quant_cfg.l_p_value,
+        node_activation_quant_cfg.activation_n_bits,
         min_value,
         max_value,
-        min_threshold=activation_quant_cfg.min_threshold,
-        quant_error_method=activation_quant_cfg.activation_error_method,
+        min_threshold=MIN_THRESHOLD,
+        quant_error_method=quant_cfg.activation_error_method,
         is_signed=signed
     )
-def _get_histogram_data(
-    activation_quant_cfg: NodeActivationQuantizationConfig,
-    out_stats_container: BaseStatsCollector
-) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
+def _get_histogram_data(out_stats_container: BaseStatsCollector,
+                        activation_error_method: QuantizationErrorMethod,
+                        z_threshold: float) -> Tuple[Optional[np.ndarray], Optional[np.ndarray]]:
     """
     Extract and filter the histogram data from the statistics container.
     Args:
-        activation_quant_cfg: Node's activation quantization configuration.
         out_stats_container: Statistics container with histogram data.
+        activation_error_method: activation quantization error method.
+        z_threshold: z threshold for z-score filtering.
     Returns:
         A tuple containing the filtered bins_values and bins_counts.
@@ -83,12 +89,12 @@ def _get_histogram_data(
     # If the statistics container collected the histogram, we start by filtering outliers using z threshold
     # filtering, and then computing the threshold based on the filtered histogram.
     if out_stats_container.require_collection():
-        if activation_quant_cfg.activation_error_method == QuantizationErrorMethod.HMSE:
+        if activation_error_method == QuantizationErrorMethod.HMSE:
             bins_values, bins_counts = out_stats_container.weighted_hc.get_histogram()
         else:
             bins_values, bins_counts = out_stats_container.hc.get_histogram()
         bins_counts = qpg.z_score_filter(
-            activation_quant_cfg.z_threshold,
+            z_threshold,
             bins_values,
             bins_counts
         )

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -18,7 +18,7 @@ from tqdm import tqdm
 from typing import List, Callable, Generator
 from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
-from model_compression_toolkit.core import QuantizationErrorMethod
+from model_compression_toolkit.core import QuantizationErrorMethod, QuantizationConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
@@ -31,29 +31,8 @@ from model_compression_toolkit.core.common.quantization.quantization_params_gene
 from model_compression_toolkit.logger import Logger
-def _collect_nodes_for_hmse(nodes_list: List[BaseNode], graph: Graph) -> List[BaseNode]:
-    """
-    Collects nodes that are compatiable for parameters selection search using HMSE,
-    that is, have a kernel attribute that is configured for HMSE error method.
-    Args:
-        nodes_list: A list of nodes to search quantization parameters for.
-        graph: Graph to compute its nodes' quantization parameters..
-    Returns: A (possibly empty) list of nodes.
-    """
-    hmse_nodes = []
-    for n in nodes_list:
-        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and \
-            all([c.weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_error_method ==
-                 QuantizationErrorMethod.HMSE for c in n.candidates_quantization_cfg]):
-            hmse_nodes.append(n)
-    return hmse_nodes
 def calculate_quantization_params(graph: Graph,
+                                  quant_cfg: QuantizationConfig,
                                   fw_impl: FrameworkImplementation,
                                   repr_data_gen_fn: Callable[[], Generator],
                                   nodes: List[BaseNode] = None,
@@ -68,6 +47,7 @@ def calculate_quantization_params(graph: Graph,
     Args:
         graph: Graph to compute its nodes' thresholds.
+        quant_cfg: quantization config.
         fw_impl: FrameworkImplementation object.
         repr_data_gen_fn: callable returning representative dataset generator.
         nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
@@ -85,15 +65,16 @@ def calculate_quantization_params(graph: Graph,
     # Collecting nodes that are configured to search weights quantization parameters using HMSE optimization
     # and computing required Hessian information to be used for HMSE parameters selection.
     # The Hessian scores are computed and stored in the hessian_info_service object.
-    nodes_for_hmse = _collect_nodes_for_hmse(nodes_list, graph)
-    if len(nodes_for_hmse) > 0:
-        dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
-        request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
-                                       granularity=HessianScoresGranularity.PER_ELEMENT,
-                                       data_loader=dataloader,
-                                       n_samples=num_hessian_samples,
-                                       target_nodes=nodes_for_hmse)
-        hessian_info_service.fetch_hessian(request)
+    if quant_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+        nodes_for_hmse = [n for n in nodes_list if n.kernel_attr and n.is_weights_quantization_enabled(n.kernel_attr)]
+        if nodes_for_hmse:
+            dataloader = fw_impl.convert_data_gen_to_dataloader(repr_data_gen_fn, batch_size=1)
+            request = HessianScoresRequest(mode=HessianMode.WEIGHTS,
+                                           granularity=HessianScoresGranularity.PER_ELEMENT,
+                                           data_loader=dataloader,
+                                           n_samples=num_hessian_samples,
+                                           target_nodes=nodes_for_hmse)
+            hessian_info_service.fetch_hessian(request)
     for n in tqdm(nodes_list, "Calculating quantization parameters"):  # iterate only nodes that we should compute their thresholds
         for candidate_qc in n.candidates_quantization_cfg:
@@ -101,28 +82,24 @@ def calculate_quantization_params(graph: Graph,
                 if n.is_weights_quantization_enabled(attr):
                     # If the node's weights attribute should be quantized, we compute its quantization parameters
                     attr_cfg = candidate_qc.weights_quantization_cfg.get_attr_config(attr)
-                    channels_axis = attr_cfg.weights_channels_axis
-                    if channels_axis is not None:
-                        output_channels_axis = channels_axis[0]
-                    else:
-                        output_channels_axis = None
-                    mod_attr_cfg = attr_cfg
+                    output_channels_axis = attr_cfg.weights_channels_axis.output
-                    if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+                    weights_error_method = quant_cfg.weights_error_method
+                    if weights_error_method == QuantizationErrorMethod.HMSE:
                         # Although we collected nodes for HMSE before running the loop, we keep this verification to
                         # notify the user in case of HMSE configured for node that is not compatible for this method
                         if n.kernel_attr is None or n.kernel_attr not in attr:
                             Logger.warning(f"The HMSE error method for parameters selection is only supported for "
                                            f"kernel weights attributes. Running parameters selection for attribute "
                                            f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
-                            mod_attr_cfg = copy.deepcopy(attr_cfg)
-                            mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
+                            weights_error_method = QuantizationErrorMethod.MSE
-                    min_threshold = candidate_qc.weights_quantization_cfg.min_threshold
                     weights_params, output_channels_axis = compute_weights_qparams(n.get_weights_by_keys(attr),
-                                                                                   mod_attr_cfg, output_channels_axis,
-                                                                                   min_threshold=min_threshold, node=n,
+                                                                                   attr_cfg,
+                                                                                   weights_error_method,
+                                                                                   quant_cfg.l_p_value,
+                                                                                   output_channels_axis,
+                                                                                   node=n,
                                                                                    hessian_info_service=hessian_info_service,
                                                                                    num_hessian_samples=num_hessian_samples)
                     attr_cfg.weights_channels_axis = ChannelAxisMapping(output_channels_axis, attr_cfg.weights_channels_axis.input)
@@ -130,8 +107,9 @@ def calculate_quantization_params(graph: Graph,
             if n.is_activation_quantization_enabled():
                 # If node's activations should be quantized as well, we compute its activation quantization parameters
-                activation_params = compute_activation_qparams(
-                    activation_quant_cfg=candidate_qc.activation_quantization_cfg, node_prior_info=n.prior_info,
-                    out_stats_container=graph.get_out_stats_collector(n))
+                activation_params = compute_activation_qparams(quant_cfg=quant_cfg,
+                                                               node_activation_quant_cfg=candidate_qc.activation_quantization_cfg,
+                                                               node_prior_info=n.prior_info,
+                                                               out_stats_container=graph.get_out_stats_collector(n))
                 # Create a NodeQuantizationConfig containing all quantization params and attach it to the node
                 candidate_qc.activation_quantization_cfg.set_activation_quantization_param(activation_params)

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py CHANGED Viewed

@@ -18,7 +18,8 @@ from typing import Dict, Any, Tuple, Callable, TYPE_CHECKING
 import numpy as np
 from mct_quantizers import QuantizationMethod
-from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES, MIN_THRESHOLD
+from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation import \
     power_of_two_selection_tensor, lut_kmeans_tensor, symmetric_selection_tensor, uniform_selection_tensor
@@ -28,10 +29,12 @@ if TYPE_CHECKING:
     from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
-def compute_weights_qparams(weights_attr_values: np.ndarray,
+def compute_weights_qparams(weights_attr_data: np.ndarray,
                             attr_quant_config: 'WeightsAttrQuantizationConfig',
+                            weights_error_method: QuantizationErrorMethod,
+                            l_p_value: int,
                             output_channels_axis: int,
-                            min_threshold: float,
+                            min_threshold: float = MIN_THRESHOLD,
                             node=None,
                             hessian_info_service: HessianInfoService = None,
                             num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Tuple[Dict[Any, Any], int]:
@@ -40,8 +43,10 @@ def compute_weights_qparams(weights_attr_values: np.ndarray,
     instance.
     Args:
-        weights_attr_values: Weights attribute parameter to compute the quantization thresholds for.
+        weights_attr_data: Weights attribute parameter to compute the quantization thresholds for.
         attr_quant_config: A specific weights attribute quantization configuration to get its params.
+        weights_error_method: quantization error method.
+        l_p_value: p-norm to use for the Lp-norm distance.
         output_channels_axis: Index of the kernel output channels dimension.
         min_threshold: Minimal threshold to use if threshold is too small.
         node: The node for which the quantization error is computed (used only with HMSE error method).
@@ -54,13 +59,13 @@ def compute_weights_qparams(weights_attr_values: np.ndarray,
     """
     params_fn = _get_weights_quantization_params_fn(attr_quant_config.weights_quantization_method)
     weights_params, output_channels_axis = params_fn(
-        weights_attr_values,
-        p=attr_quant_config.l_p_value,
+        weights_attr_data,
+        p=l_p_value,
         n_bits=attr_quant_config.weights_n_bits,
         per_channel=attr_quant_config.weights_per_channel_threshold,
         channel_axis=output_channels_axis,
         min_threshold=min_threshold,
-        quant_error_method=attr_quant_config.weights_error_method,
+        quant_error_method=weights_error_method,
         node=node,
         hessian_info_service=hessian_info_service,
         num_hessian_samples=num_hessian_samples)

model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py CHANGED Viewed

@@ -14,8 +14,6 @@
 # ==============================================================================
 import copy
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
-from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
@@ -23,7 +21,6 @@ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_s
 def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
-                                   core_config: CoreConfig,
                                    fw_impl: FrameworkImplementation) -> Graph:
     """
     Get a graph, where each node has a final weights quantization configuration (with a bias
@@ -31,7 +28,6 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     Args:
         graph_to_apply_bias_correction: Graph to apply bias correction to.
-        core_config: CoreConfig containing parameters of how the model should be quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -40,20 +36,14 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     graph = copy.deepcopy(graph_to_apply_bias_correction)
     for n in graph.nodes:
-        # bias correction is only relevant for nodes with kernel op
-        if core_config.quantization_config.weights_bias_correction and n.kernel_attr is not None and \
-            n.is_weights_quantization_enabled(n.kernel_attr) and \
-                not n.final_weights_quantization_cfg.weights_second_moment_correction:
-            # If a kernel was quantized and weights bias correction is enabled in n.quantization_cfg,
-            # a bias correction term was calculated during model preparation, and is used now in the node's bias term.
-            if n.final_weights_quantization_cfg.weights_bias_correction:
-                _apply_bias_correction_to_node(n, fw_impl, core_config.quantization_config)
+        if (n.final_weights_quantization_cfg and n.final_weights_quantization_cfg.bias_corrected is not None and
+                not n.final_weights_quantization_cfg.weights_second_moment_correction):
+            _apply_bias_correction_to_node(n, fw_impl)
     return graph
 def _apply_bias_correction_to_node(node: BaseNode,
-                                   fw_impl: FrameworkImplementation,
-                                   qc: QuantizationConfig):
+                                   fw_impl: FrameworkImplementation):
     """
     Set new bias to node using the bias-correction term that is stored in the
     final weights quantization configuration.

model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py CHANGED Viewed

@@ -52,7 +52,8 @@ def _collect_and_assign_act_threshold(graph: Graph,
     for n in graph.nodes:
         if n.is_activation_quantization_enabled():
-            activation_params = compute_activation_qparams(activation_quant_cfg=n.final_activation_quantization_cfg,
+            activation_params = compute_activation_qparams(quant_cfg=core_config.quantization_config,
+                                                           node_activation_quant_cfg=n.final_activation_quantization_cfg,
                                                            node_prior_info=n.prior_info,
                                                            out_stats_container=graph.get_out_stats_collector(n))
             n.final_activation_quantization_cfg.set_activation_quantization_param(activation_params)

model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py CHANGED Viewed

@@ -43,17 +43,9 @@ def compute_bias_correction_of_graph(graph: Graph,
     for n in graph.nodes:
         # Bias correction is computed based on the quantized kernel, so we need to get the specific kernel attribute
         # name out of all the weights attributes of the node.
-        if n.kernel_attr:
-            if n.is_weights_quantization_enabled(n.kernel_attr):
-                # Bias correction is not applied to layers with constant inputs.
-                if n.has_positional_weights:
-                    for candidate_qc in n.candidates_quantization_cfg:
-                        candidate_qc.weights_quantization_cfg.weights_bias_correction = False
-                else:
-                    _compute_bias_correction_per_candidate_qc(n,
-                                                              n.kernel_attr,
-                                                              graph.get_in_stats_collector(n),
-                                                              fw_impl=fw_impl)
+        if n.kernel_attr and n.is_weights_quantization_enabled(n.kernel_attr) and not n.has_positional_weights:
+            _compute_bias_correction_per_candidate_qc(n, n.kernel_attr, graph.get_in_stats_collector(n),
+                                                      fw_impl=fw_impl)
     return graph
@@ -74,8 +66,7 @@ def _compute_bias_correction_per_candidate_qc(node: BaseNode,
     """
     for candidate_qc in node.candidates_quantization_cfg:
-        if candidate_qc.weights_quantization_cfg.weights_bias_correction and not \
-                candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
+        if not candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
             quantized_kernel, io_channels_axes = get_quantized_weights_attr_by_qc(kernel_attr,
                                                                                   node,

model_compression_toolkit/core/common/statistics_correction/statistics_correction.py CHANGED Viewed

@@ -56,8 +56,9 @@ def statistics_correction_runner(transformed_graph: Graph,
     ########################################################
     # Compute bias correction to nodes' config candidates
     ########################################################
-    tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
-                                                    fw_impl)
+    if core_config.quantization_config.weights_bias_correction:
+        tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
+                                                        fw_impl)
     if tb_w is not None:
         tb_w.add_graph(tg_with_bias, 'statistics_computation')
@@ -96,7 +97,6 @@ def apply_statistics_correction(transformed_graph: Graph,
     #############################################
     if core_config.quantization_config.weights_bias_correction:
         transformed_graph = apply_bias_correction_to_graph(transformed_graph,
-                                                           core_config,
                                                            fw_impl=fw_impl)
     if tb_w is not None:
         tb_w.add_graph(transformed_graph, 'after_statistics_correction')

model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py CHANGED Viewed

@@ -20,7 +20,6 @@ from typing import Callable
 import numpy as np
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
     ActivationQuantizationMode
@@ -84,14 +83,10 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
         # If the linear operator is part of a reused group (it is the "base" node, or a reused node),
         # we should skip the substitution.
         if source_node.is_reused():
-            for qc in source_node.candidates_quantization_cfg:
-                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
         # We apply only on nodes with folded BatchNormalization.
         if source_node.prior_info.std_output is None or source_node.prior_info.mean_output is None:
-            for qc in source_node.candidates_quantization_cfg:
-                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
         # This feature disabled for models with weights quantization method of Power of 2
@@ -103,10 +98,13 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
                     == QuantizationMethod.POWER_OF_TWO):
                 Logger.warning("Second moment statistics correction feature disabled for models with weights "
                                "quantization method of Power of 2")
-                for qc_inner in source_node.candidates_quantization_cfg:
-                    qc_inner.weights_quantization_cfg.weights_second_moment_correction = False
                 return graph
+        # turn on second moment correction flag
+        def set_second_moment_correction(qc):
+            qc.weights_quantization_cfg.weights_second_moment_correction = True
+        source_node.quantization_cfg.update_all(set_second_moment_correction)
         eps = self.epsilon_val
         original_gamma = source_node.prior_info.std_output

model_compression_toolkit/core/common/substitutions/shift_negative_activation.py CHANGED Viewed

@@ -298,7 +298,7 @@ def shift_negative_function(graph: Graph,
     negative_rate = np.abs(min_to_correct) / activation_threshold
-    enable_sub = negative_rate <= non_linear_node_cfg_candidate.shift_negative_ratio
+    enable_sub = negative_rate <= core_config.quantization_config.shift_negative_ratio
     if min_to_correct >= 0 or not enable_sub:
         return graph
@@ -316,7 +316,7 @@ def shift_negative_function(graph: Graph,
     if core_config.quantization_config.shift_negative_params_search:
         hist_bins, hist_count = graph.get_out_stats_collector(non_linear_node).hc.get_histogram()
-        hist_count = z_score_filter(non_linear_node_cfg_candidate.z_threshold,
+        hist_count = z_score_filter(core_config.quantization_config.z_threshold,
                                     hist_bins, hist_count)
         min_mse, _th, _shift = np.inf, None, None
@@ -471,10 +471,12 @@ def shift_negative_function(graph: Graph,
                                pad_node=pad_node,
                                op2d_node=op2d_node)
-    if non_linear_node_cfg_candidate.shift_negative_threshold_recalculation:
-        activation_param = compute_activation_qparams(activation_quant_cfg=non_linear_node_cfg_candidate,
+    if core_config.quantization_config.shift_negative_threshold_recalculation:
+        activation_param = compute_activation_qparams(quant_cfg=core_config.quantization_config,
+                                                      node_activation_quant_cfg=non_linear_node_cfg_candidate,
                                                       node_prior_info=non_linear_node.prior_info,
-                                                      out_stats_container=graph.get_out_stats_collector(non_linear_node))
+                                                      out_stats_container=graph.get_out_stats_collector(
+                                                          non_linear_node))
         assert activation_param.get(SIGNED) is False
         for candidate_qc in non_linear_node.candidates_quantization_cfg:

model_compression_toolkit/core/graph_prep_runner.py CHANGED Viewed

@@ -153,20 +153,10 @@ def get_finalized_graph(initial_graph: Graph,
     if bit_width_config:
         set_manual_bitwidth_config(graph, bit_width_config)
-    # TODO irena: load_fqc_configuration only loads config from tpc. Previously quant_config was read as well.
-    #  As a first stage we keep the attributes in internal configs and fill them manually from quant_config
-    #  not to break all the code at once. Eventually we need to handle quant_config directly, without injecting into candidates.
-    #  TODO 2: Also we adjust candidates for single precision, which we shouldn't do here.
-    def update(qc):
-        qc.activation_quantization_cfg.set_qc(quant_config)
-        qc.weights_quantization_cfg.set_qc(quant_config)
-        for attr_cfg in qc.weights_quantization_cfg.get_all_weight_attrs_configs().values():
-            attr_cfg.weights_error_method = quant_config.weights_error_method
-            attr_cfg.l_p_value = quant_config.l_p_value
+    # TODO irena: remove after base config is used
     for n in transformed_graph.nodes:
         if not mixed_precision_enable:
             n.quantization_cfg.candidates_quantization_cfg = [n.quantization_cfg.base_quantization_cfg]
-        n.quantization_cfg.update_all(update)
     ######################################
     # Channel equalization

model_compression_toolkit/core/keras/default_framework_info.py CHANGED Viewed

@@ -143,7 +143,7 @@ class KerasInfo(FrameworkInfo):
             Node's output channel axis.
         """
-        return cls.out_channel_axis_mapping.get(node_type, -1)
+        return cls.out_channel_axis_mapping.get(node_type)
 def set_keras_info(func):

model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py CHANGED Viewed

@@ -17,7 +17,7 @@
 from tensorflow.keras.layers import InputLayer, Dense, DepthwiseConv2D, Conv2D, Conv2DTranspose, ZeroPadding2D
 from typing import List
-from model_compression_toolkit.core import common
+from model_compression_toolkit.core import common, QuantizationConfig
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher, WalkMatcher
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
@@ -47,7 +47,8 @@ class BaseInputScaling(common.BaseSubstitution):
     """
     def __init__(self,
-                 matcher_instance):
+                 matcher_instance,
+                 quant_cfg: QuantizationConfig):
         """
         Matches: InputLayer -> (optional nodes) -> (Dense,Conv2D,DepthwiseConv2D,Conv2DTranspose)
         note: the optional nodes are nodes that don't affect the scaling (such as ZeroPadding)
@@ -55,10 +56,11 @@ class BaseInputScaling(common.BaseSubstitution):
         Create a substitution using different params which may affect the way this substitution is made.
         The substitution is looking for edges in the graph which are input layers connected to linear layers.
         Args:
-            matcher_instance: matcher instance of type WalkMatcher
+            matcher_instance: matcher instance of type WalkMatcher.
+            quant_cfg: quantization config.
         """
         super().__init__(matcher_instance=matcher_instance)
+        self.quant_cfg = quant_cfg
     def substitute(self,
                    graph: Graph,
@@ -105,9 +107,11 @@ class BaseInputScaling(common.BaseSubstitution):
             for nqc in linear_layer.candidates_quantization_cfg:
                 attr_cfg = nqc.weights_quantization_cfg.get_attr_config(linear_layer.kernel_attr)
                 assert attr_cfg.enable_weights_quantization
-                w_params, _ = compute_weights_qparams(w1_fixed, attr_quant_config=attr_cfg,
-                                                      output_channels_axis=attr_cfg.weights_channels_axis.output,
-                                                      min_threshold=nqc.weights_quantization_cfg.min_threshold)
+                w_params, _ = compute_weights_qparams(w1_fixed,
+                                                      attr_quant_config=attr_cfg,
+                                                      weights_error_method=self.quant_cfg.weights_error_method,
+                                                      l_p_value=self.quant_cfg.l_p_value,
+                                                      output_channels_axis=attr_cfg.weights_channels_axis.output)
                 attr_cfg.set_weights_quantization_param(w_params)
         return graph
@@ -118,12 +122,15 @@ class InputScaling(BaseInputScaling):
     Substitution extends BaseInputScaling to the case of Input-->Linear
     """
-    def __init__(self):
+    def __init__(self, quant_cfg: QuantizationConfig):
         """
         Initialize a ScaleEqualization object.
+        Args:
+            quant_cfg: quantization config.
         """
-        super().__init__(matcher_instance=INPUT_MATCHER)
+        super().__init__(matcher_instance=INPUT_MATCHER, quant_cfg=quant_cfg)
 class InputScalingWithPad(BaseInputScaling):
@@ -131,9 +138,12 @@ class InputScalingWithPad(BaseInputScaling):
     Substitution extends BaseInputScaling to the case of Input-->ZeroPadding-->Linear
     """
-    def __init__(self):
+    def __init__(self, quant_cfg: QuantizationConfig):
         """
         Initialize a ScaleEqualization object.
+        Args:
+            quant_cfg: quantization config.
         """
-        super().__init__(matcher_instance=INPUT_MATCHER_WITH_PAD)
+        super().__init__(matcher_instance=INPUT_MATCHER_WITH_PAD, quant_cfg=quant_cfg)

model_compression_toolkit/core/keras/keras_implementation.py CHANGED Viewed

@@ -357,8 +357,8 @@ class KerasImplementation(FrameworkImplementation):
         if quant_config.softmax_shift:
             substitutions_list.append(keras_softmax_shift())
         if quant_config.input_scaling:
-            substitutions_list.append(InputScaling())
-            substitutions_list.append(InputScalingWithPad())
+            substitutions_list.append(InputScaling(quant_config))
+            substitutions_list.append(InputScalingWithPad(quant_config))
         if quant_config.concat_threshold_update:
             substitutions_list.append(ConcatThresholdUpdate())
         return substitutions_list

model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py CHANGED Viewed

@@ -28,6 +28,10 @@ import numpy as np
 from model_compression_toolkit.logger import Logger
+# default output channel axis to use when it's not defined in node's fw_info.
+_default_output_channel_axis = -1
 class PruningKerasImplementation(KerasImplementation, PruningFrameworkImplementation):
     """
     Implementation of the PruningFramework for the Keras framework. This class provides
@@ -172,6 +176,10 @@ class PruningKerasImplementation(KerasImplementation, PruningFrameworkImplementa
         return attributes_with_axis
+    @property
+    def default_output_channel_axis(self):
+        return _default_output_channel_axis
 def _is_keras_node_pruning_section_edge(node: BaseNode) -> bool:
     """

model_compression_toolkit/core/pytorch/default_framework_info.py CHANGED Viewed

@@ -101,7 +101,7 @@ class PyTorchInfo(FrameworkInfo):
             Node's output channel axis.
         """
-        return cls.out_channel_axis_mapping.get(node_type, 1)
+        return cls.out_channel_axis_mapping.get(node_type)
 def set_pytorch_info(func):

mct-nightly 2.4.0.20250705.556__py3-none-any.whl → 2.4.0.20250707.643__py3-none-any.whl

mct-nightly 2.4.0.20250705.556py3-none-any.whl → 2.4.0.20250707.643py3-none-any.whl