PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/substitutions/shift_negative_activation.py CHANGED Viewed

@@ -16,20 +16,21 @@ import copy
 import numpy as np
 from typing import List, Tuple, Any, Callable
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
     ActivationQuantizationMode
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common import FrameworkInfo, Graph, BaseNode
 from model_compression_toolkit.constants import THRESHOLD, SIGNED, SHIFT_NEGATIVE_NON_LINEAR_NUM_BITS
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core.common.quantization.set_node_quantization_config import create_node_activation_qc, \
+    set_quantization_configs_to_node
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
-    import compute_activation_qparams
+    import get_activations_qparams
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
     _mse_error_histogram
 from model_compression_toolkit.core.common.quantization.quantization_params_generation import z_score_filter
-from model_compression_toolkit.quantization_preparation.load_fqc import set_quantization_configs_to_node, \
-    fetch_qc_options_for_node
 from model_compression_toolkit.target_platform_capabilities import QuantizationMethod, AttributeQuantizationConfig
 """
@@ -45,6 +46,7 @@ If the linear node pads the input tensor with zeros, we modify the padded value
 def op2d_bias_correction(op2d_node: BaseNode,
                          shift_to_correct: float,
+                         fw_info: FrameworkInfo,
                          bias_str: str,
                          bias_flag_str: str):
     """
@@ -55,6 +57,7 @@ def op2d_bias_correction(op2d_node: BaseNode,
         op2d_node: Node to compute its bias correction term.
         shift_to_correct: Value that was used to shift the output tensor of
         the non-linear node.
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         bias_str:
         bias_flag_str: The framework specific attribute name of the bias flag.
     """
@@ -66,19 +69,21 @@ def op2d_bias_correction(op2d_node: BaseNode,
         # Add an attribute quantization configuration to the newly added bias attribute, with disabled quantization
         for qc in op2d_node.candidates_quantization_cfg:
             qc.weights_quantization_cfg.set_attr_config(bias_flag_str,
-                                                        WeightsAttrQuantizationConfig(AttributeQuantizationConfig(
+                                                        WeightsAttrQuantizationConfig(QuantizationConfig(),
+                                                                                      AttributeQuantizationConfig(
                                                                                           enable_weights_quantization=False)))
     # Each node adds a different noise due to the shifting. It depends on the
     # dimensions of the kernel, thus the correction term is a function of
     # the layer type.
-    kernel = op2d_node.get_weights_by_keys(op2d_node.kernel_attr)
+    kernel = op2d_node.get_weights_by_keys(fw_info.kernel_ops_attributes_mapping.get(op2d_node.type)[0])
     if kernel is not None:
+        output_channel_index, input_channel_index = fw_info.kernel_channels_mapping.get(op2d_node.type)
         axis_not_output_channel = list(range(len(kernel.shape)))
-        axis_not_output_channel.remove(op2d_node.channel_axis.output)
+        axis_not_output_channel.remove(output_channel_index)
         # special case of depthwise_conv2d in tensorflow, where we have a depth multiplier for the filters
-        if op2d_node.channel_axis.output == op2d_node.channel_axis.input:
+        if output_channel_index == input_channel_index:
             axis_not_output_channel.remove(3)  # 3 is the depth multiplier index
         bias_correction = shift_to_correct * np.sum(kernel, axis=tuple(axis_not_output_channel))
@@ -245,13 +250,13 @@ def shift_negative_function(graph: Graph,
                             core_config: CoreConfig,
                             non_linear_node: BaseNode,
                             op2d_node: BaseNode,
+                            fw_info: FrameworkInfo,
                             create_add_node: Callable,
                             get_padding_values: Callable,
                             create_pad_node: Callable,
                             padding_str: str,
                             bias_str: str,
                             bias_flag_str: str,
-                            get_activation_quantization_fn_factory: Callable,
                             zero_padding_node: BaseNode = None,
                             bypass_nodes: List = None,
                             params_search_quantization_fn: Callable = None
@@ -271,13 +276,14 @@ def shift_negative_function(graph: Graph,
         non_linear_node: Non-linear node with negative values to shift.
         op2d_node: Linear node to correct its bias to overcome the expected error due to
         the shifting.
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
+        groups of layers by how they should be quantized, etc.)
         create_add_node: Function to create an add node.
         get_padding_values: Function to compute the op2d node's padding values
         create_pad_node: Function to create an pad node.
         padding_str: The framework specific attribute name of the padding.
         bias_str: The framework specific attribute name of the bias.
         bias_flag_str: The framework specific attribute name of the bias flag.
-        get_activation_quantization_fn_factory: activation quantization functions factory.
         zero_padding_node: ZeroPadding2D node that may be in the graph before the linear layer.
         params_search_quantization_fn: Function to quantize np tensor using a framework (tf/torch) quantization method. Needed for better param_search estimating the expected loss.
@@ -293,12 +299,13 @@ def shift_negative_function(graph: Graph,
     # all candidates have same activation config, so taking the first candidate for calculations
     non_linear_node_cfg_candidate = non_linear_node.candidates_quantization_cfg[0].activation_quantization_cfg
     # get the non-linear activation threshold
     activation_threshold = non_linear_node_cfg_candidate.activation_quantization_params.get(THRESHOLD)
     negative_rate = np.abs(min_to_correct) / activation_threshold
-    enable_sub = negative_rate <= core_config.quantization_config.shift_negative_ratio
+    enable_sub = negative_rate <= non_linear_node_cfg_candidate.shift_negative_ratio
     if min_to_correct >= 0 or not enable_sub:
         return graph
@@ -316,7 +323,7 @@ def shift_negative_function(graph: Graph,
     if core_config.quantization_config.shift_negative_params_search:
         hist_bins, hist_count = graph.get_out_stats_collector(non_linear_node).hc.get_histogram()
-        hist_count = z_score_filter(core_config.quantization_config.z_threshold,
+        hist_count = z_score_filter(non_linear_node_cfg_candidate.z_threshold,
                                     hist_bins, hist_count)
         min_mse, _th, _shift = np.inf, None, None
@@ -327,15 +334,13 @@ def shift_negative_function(graph: Graph,
                 'float32')  # Change to type float32 to support tensorflow dtypes
             for _shift_value in _q_points:
                 _hist_bins = hist_bins.astype(np.float32) + _shift_value
-                quantizer_factory = get_activation_quantization_fn_factory(
-                    non_linear_node_cfg_candidate.activation_quantization_method)
-                fw_quant_fn = quantizer_factory(non_linear_node_cfg_candidate.activation_n_bits, qparams)
+                fw_quant_fn = non_linear_node_cfg_candidate.activation_quantization_fn(non_linear_node_cfg_candidate.activation_n_bits,qparams)
                 """
                 In SNC, when better shifting values are tested for better choice,
                 the histogram (which is a numpy object) is quantized using the non-linear node activation
                 quantization function (to estimate the expected mse comparing to the original histogram).
                 The quantization function is a framework function, which makes it fail since it
-                expects a fw tensor. The common part of SNC receives an argument which is a callable
+                expects a fw tensor. The commmon part of SNC receives an argument which is a callable
                 that receives two argument and returns one: it gets the fw activation quantization function
                 and the bins to quantize. The function (of each fw) responsible for doing (if needed) a preprocessing and postprocessing
                 to the bins which is a numpy object.
@@ -385,6 +390,7 @@ def shift_negative_function(graph: Graph,
                            first_node=non_linear_node)
     op2d_bias_correction(op2d_node,
                          shift_value,
+                         fw_info,
                          bias_str,
                          bias_flag_str)
@@ -395,9 +401,12 @@ def shift_negative_function(graph: Graph,
     graph.set_out_stats_collector_to_node(add_node, add_node_stats_collector)
     graph.shift_stats_collector(add_node, np.array(shift_value))
-    set_quantization_configs_to_node(node=add_node,
+    set_quantization_configs_to_node(fw_info=fw_info,
+                                     node=add_node,
                                      graph=graph,
-                                     fqc=graph.fqc)
+                                     quant_config=core_config.quantization_config,
+                                     fqc=graph.fqc,
+                                     mixed_precision_enable=core_config.is_mixed_precision_enabled)
     update_fused_op_with_add(graph=graph,
                              non_linear_node=non_linear_node,
@@ -419,9 +428,12 @@ def shift_negative_function(graph: Graph,
                                 last_node=op2d_node)
         # Set quantization configuration to node, even though we do not quantize it:
-        set_quantization_configs_to_node(node=pad_node,
+        set_quantization_configs_to_node(fw_info=fw_info,
+                                         node=pad_node,
                                          graph=graph,
-                                         fqc=graph.fqc)
+                                         quant_config=core_config.quantization_config,
+                                         fqc=graph.fqc,
+                                         mixed_precision_enable=core_config.is_mixed_precision_enabled)
         for candidate_qc in pad_node.candidates_quantization_cfg:
             candidate_qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
@@ -446,7 +458,7 @@ def shift_negative_function(graph: Graph,
                     bypass_candidate_qc.activation_quantization_cfg.activation_quantization_params[SIGNED] = False
                     graph.shift_stats_collector(bypass_node, np.array(shift_value))
-    add_node_qco = fetch_qc_options_for_node(add_node, graph.fqc).quantization_configurations
+    add_node_qco = add_node.get_qco(graph.fqc).quantization_configurations
     add_supported_bitwidths = [c.activation_n_bits for c in add_node_qco]
     if original_non_linear_activation_nbits not in add_supported_bitwidths:
         raise ValueError(
@@ -454,16 +466,19 @@ def shift_negative_function(graph: Graph,
             f"bitwidth is {original_non_linear_activation_nbits}. Consider adapting the TPC so 'Add' will support the "
             f"same bitwidth as {non_linear_node.type} or disable shift negative correction.")
-    set_quantization_configs_to_node(add_node, graph, graph.fqc)
-    # TODO: do we not quantize the weights of this 'add' on purpose?
-    add_node.quantization_cfg.disable_weights_quantization()
+    for op_qc_idx, candidate_qc in enumerate(add_node.candidates_quantization_cfg):
+        for attr in add_node.get_node_weights_attributes():
+            # TODO: do we not quantize the weights of this 'add' on purpose?
+            candidate_qc.weights_quantization_cfg.get_attr_config(attr).enable_weights_quantization = False
+        candidate_qc.activation_quantization_cfg = create_node_activation_qc(core_config.quantization_config,
+                                                                             fw_info,
+                                                                             add_node_qco[op_qc_idx])
-    def update(c):
-        c.activation_quantization_cfg.activation_n_bits = original_non_linear_activation_nbits
-        c.activation_quantization_cfg.set_activation_quantization_param({THRESHOLD: activation_threshold,
-                                                                         SIGNED: False})
+        candidate_qc.activation_quantization_cfg.set_activation_quantization_param({THRESHOLD: activation_threshold,
+                                                                                    SIGNED: False})
-    add_node.quantization_cfg.update_all(update, remove_duplicates=True)
+        candidate_qc.activation_quantization_cfg.activation_n_bits = original_non_linear_activation_nbits
     # Add the new padding node to a fused op with the op2d.
     if pad_node:
@@ -471,14 +486,12 @@ def shift_negative_function(graph: Graph,
                                pad_node=pad_node,
                                op2d_node=op2d_node)
-    if core_config.quantization_config.shift_negative_threshold_recalculation:
-        activation_param = compute_activation_qparams(quant_cfg=core_config.quantization_config,
-                                                      node_activation_quant_cfg=non_linear_node_cfg_candidate,
-                                                      node_prior_info=non_linear_node.prior_info,
-                                                      out_stats_container=graph.get_out_stats_collector(
-                                                          non_linear_node))
+    if non_linear_node_cfg_candidate.shift_negative_threshold_recalculation:
+        activation_param = get_activations_qparams(activation_quant_cfg=non_linear_node_cfg_candidate,
+                                                   nodes_prior_info=non_linear_node.prior_info,
+                                                   out_stats_container=graph.get_out_stats_collector(non_linear_node))
-        assert activation_param.get(SIGNED) is False
+        assert activation_param.get(SIGNED) == False
         for candidate_qc in non_linear_node.candidates_quantization_cfg:
             candidate_qc.activation_quantization_cfg.set_activation_quantization_param(activation_param)
@@ -560,6 +573,7 @@ def get_next_nodes_to_correct(n: BaseNode,
 def apply_shift_negative_correction(graph: Graph,
                                     core_config: CoreConfig,
+                                    fw_info: FrameworkInfo,
                                     snc_node_types: NodeOperationMatcher,
                                     linear_node_types: NodeOperationMatcher,
                                     bypass_node_types: NodeOperationMatcher,
@@ -571,7 +585,6 @@ def apply_shift_negative_correction(graph: Graph,
                                     padding_str: str,
                                     bias_str: str,
                                     bias_flag_str: str,
-                                    get_activation_quantization_fn_factory: Callable,
                                     params_search_quantization_fn: Callable=None) -> Graph:
     """
     Apply the substitution even if the linear node is not immediately after
@@ -580,6 +593,7 @@ def apply_shift_negative_correction(graph: Graph,
     Args:
         graph: Graph to apply the substitution on.
         core_config: Quantization configuration to build the substitutions list according to.
+        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         snc_node_types: Types of activation nodes with negative outputs to consider.
         linear_node_types: Types of linear nodes to consider.
@@ -593,9 +607,6 @@ def apply_shift_negative_correction(graph: Graph,
         padding_str: The framework specific attribute name of the padding.
         bias_str: The framework specific attribute name of the bias.
         bias_flag_str: The framework specific attribute name of the bias flag.
-        get_activation_quantization_fn_factory: activation quantization functions factory.
-        params_search_quantization_fn: Function to quantize np tensor using a framework (tf/torch) quantization method. Needed for better param_search estimating the expected loss.
     Returns:
         Graph after applying shift negative on selected activations.
     """
@@ -603,8 +614,9 @@ def apply_shift_negative_correction(graph: Graph,
     nodes = list(graph.nodes())
     for n in nodes:
         # Skip substitution if QuantizationMethod is uniform.
-        if any(aqc.activation_quantization_cfg.activation_quantization_method == QuantizationMethod.UNIFORM
-               for aqc in n.candidates_quantization_cfg):
+        node_qco = n.get_qco(graph.fqc)
+        if any([op_qc.activation_quantization_method is QuantizationMethod.UNIFORM
+                for op_qc in node_qco.quantization_configurations]):
             continue
         if snc_node_types.apply(n):
@@ -620,13 +632,13 @@ def apply_shift_negative_correction(graph: Graph,
                                                 core_config,
                                                 n,
                                                 linear_node,
+                                                fw_info,
                                                 create_add_node,
                                                 get_padding_values,
                                                 create_pad_node,
                                                 padding_str,
                                                 bias_str,
                                                 bias_flag_str,
-                                                get_activation_quantization_fn_factory,
                                                 zero_padding_node=pad_node,
                                                 bypass_nodes=bypass_nodes,
                                                 params_search_quantization_fn=params_search_quantization_fn)

model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py CHANGED Viewed

@@ -50,7 +50,9 @@ class BaseVirtualActivationWeightsComposition(BaseSubstitution):
             return graph
         # Virtual composed activation-weights node
-        v_node = VirtualActivationWeightsNode(act_node, weights_node)
+        v_node = VirtualActivationWeightsNode(act_node,
+                                              weights_node,
+                                              fw_info=graph.fw_info)
         # Update graph
         graph.add_node(v_node)

model_compression_toolkit/core/common/substitutions/weights_activation_split.py CHANGED Viewed

@@ -50,7 +50,7 @@ class BaseWeightsActivationSplit(BaseSubstitution):
             Graph after applying the substitution.
         """
         # The decomposition works on linear nodes, that is, nodes with kernel ops
-        kernel_attr = node.kernel_attr
+        kernel_attr = graph.fw_info.get_kernel_op_attributes(node.type)[0]
         if kernel_attr is None:
             Logger.critical(f"Trying to split node weights and activation, but node "
                             f"{node.name} doesn't have a kernel attribute.")

model_compression_toolkit/core/common/visualization/nn_visualizer.py CHANGED Viewed

@@ -59,19 +59,22 @@ class NNVisualizer:
     def __init__(self,
                  graph_float: Graph,
                  graph_quantized: Graph,
-                 fw_impl: FrameworkImplementation):
+                 fw_impl: FrameworkImplementation,
+                 fw_info: FrameworkInfo):
         """
         Initialize a NNVisualizer object.
         Args:
             graph_float: Float version of the graph.
             graph_quantized: Quantized version of the graph.
             fw_impl: Framework implementation with framework-specific methods implementation.
+            fw_info: Framework info with framework-specific information.
         """
         self.graph_float = graph_float
         self.graph_quantized = graph_quantized
         self.fw_impl = fw_impl
+        self.fw_info = fw_info
         # Get compare points of two graphs.
         self.compare_points, self.compare_points_name = _get_compare_points(self.graph_quantized)
@@ -89,11 +92,13 @@ class NNVisualizer:
         self.quantized_model, _ = self.fw_impl.model_builder(self.graph_quantized,
                                                              mode=ModelBuilderMode.QUANTIZED,
-                                                             append2output=self.compare_points)
+                                                             append2output=self.compare_points,
+                                                             fw_info=self.fw_info)
         self.float_model, _ = self.fw_impl.model_builder(self.graph_float,
                                                          mode=ModelBuilderMode.FLOAT,
-                                                         append2output=self.compare_points_float)
+                                                         append2output=self.compare_points_float,
+                                                         fw_info=self.fw_info)
     def has_compare_points(self) -> bool:
         """

model_compression_toolkit/core/common/visualization/tensorboard_writer.py CHANGED Viewed

@@ -89,18 +89,20 @@ class TensorboardWriter(object):
     Class to log events to display using Tensorboard such as graphs, histograms, images, etc.
     """
-    def __init__(self, dir_path: str):
+    def __init__(self, dir_path: str, fw_info: FrameworkInfo):
         """
         Initialize a TensorboardWriter object.
         Args:
             dir_path: Path to save all events to display on Tensorboard.
+            fw_info: FrameworkInfo object (needed for computing nodes' weights memory).
         """
         self.dir_path = dir_path
         # we hold EventWriter per tag name, so events can be gathered by tags (like phases during the quantization
         # process).
         self.tag_name_to_event_writer = {}
+        self.fw_info = fw_info
     def close(self):
         """
@@ -207,7 +209,7 @@ class TensorboardWriter(object):
             attr = dict()
             if n.final_activation_quantization_cfg is not None:
                 attr.update(n.final_activation_quantization_cfg.__dict__)
-            elif n.quantization_cfg is not None:
+            elif n.candidates_quantization_cfg is not None:
                 attr.update(n.get_unified_activation_candidates_dict())
             return attr
@@ -229,8 +231,8 @@ class TensorboardWriter(object):
             attr = dict()
             if n.final_weights_quantization_cfg is not None:
                 attr.update(n.final_weights_quantization_cfg.__dict__)
-            elif n.quantization_cfg is not None:
-                attr.update(n.get_unified_weights_candidates_dict())
+            elif n.candidates_quantization_cfg is not None:
+                attr.update(n.get_unified_weights_candidates_dict(self.fw_info))
             return attr
         def __get_node_attr(n: BaseNode) -> Dict[str, Any]:
@@ -294,7 +296,7 @@ class TensorboardWriter(object):
             return NodeExecStats(node_name=n.name,
                                  memory=[AllocatorMemoryUsed(
-                                     total_bytes=int(n.get_memory_bytes())
+                                     total_bytes=int(n.get_memory_bytes(self.fw_info))
                                  )])
         graph_def = GraphDef()  # GraphDef to add to Tensorboard
@@ -524,12 +526,14 @@ class TensorboardWriter(object):
         er.add_event(event)
         er.flush()
-def init_tensorboard_writer() -> TensorboardWriter:
+def init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter:
     """
     Create a TensorBoardWriter object initialized with the logger dir path if it was set,
     or None otherwise.
+    Args:
+        fw_info: FrameworkInfo object.
     Returns:
         A TensorBoardWriter object.
     """
@@ -537,7 +541,7 @@ def init_tensorboard_writer() -> TensorboardWriter:
     if Logger.LOG_PATH is not None:
         tb_log_dir = os.path.join(os.getcwd(), Logger.LOG_PATH, 'tensorboard_logs')
         Logger.info(f'To use Tensorboard, please run: tensorboard --logdir {tb_log_dir}')
-        tb_w = TensorboardWriter(tb_log_dir)
+        tb_w = TensorboardWriter(tb_log_dir, fw_info)
     return tb_w

model_compression_toolkit/core/graph_prep_runner.py CHANGED Viewed

@@ -16,27 +16,28 @@
 from typing import Callable, Any
+from model_compression_toolkit.core.common import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfoGenerator
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
 from model_compression_toolkit.core.common.quantization.filter_nodes_candidates import filter_nodes_candidates
-from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG, \
-    QuantizationErrorMethod
+from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG
 from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
-from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_manual_bitwidth_config
+from model_compression_toolkit.core.common.quantization.set_node_quantization_config import \
+    set_quantization_configuration_to_graph
 from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
 from model_compression_toolkit.core.common.substitutions.linear_collapsing_substitution import \
     linear_collapsing_substitute
 from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter
-from model_compression_toolkit.quantization_preparation.load_fqc import load_fqc_configuration
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
     FrameworkQuantizationCapabilities
-from model_compression_toolkit.logger import Logger
 def graph_preparation_runner(in_model: Any,
                              representative_data_gen: Callable,
                              quantization_config: QuantizationConfig,
+                             fw_info: FrameworkInfo,
                              fw_impl: FrameworkImplementation,
                              fqc: FrameworkQuantizationCapabilities,
                              bit_width_config: BitWidthConfig = None,
@@ -55,6 +56,8 @@ def graph_preparation_runner(in_model: Any,
         in_model (Any): Model to quantize.
         representative_data_gen (Callable): Dataset used for calibration.
         quantization_config (QuantizationConfig): QuantizationConfig containing parameters of how the model should be quantized.
+        fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices,
+            groups of layers by how they should be quantized, etc.).
         fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation.
         fqc (FrameworkQuantizationCapabilities): FrameworkQuantizationCapabilities object that models the inference target platform and
             the attached framework operator's information.
@@ -70,6 +73,7 @@ def graph_preparation_runner(in_model: Any,
     graph = read_model_to_graph(in_model,
                                 representative_data_gen,
                                 fqc,
+                                fw_info,
                                 fw_impl)
     if tb_w is not None:
@@ -79,6 +83,7 @@ def graph_preparation_runner(in_model: Any,
                                             fqc,
                                             quantization_config,
                                             bit_width_config,
+                                            fw_info,
                                             tb_w,
                                             fw_impl,
                                             mixed_precision_enable=mixed_precision_enable,
@@ -91,6 +96,7 @@ def get_finalized_graph(initial_graph: Graph,
                         fqc: FrameworkQuantizationCapabilities,
                         quant_config: QuantizationConfig = DEFAULTCONFIG,
                         bit_width_config: BitWidthConfig = None,
+                        fw_info: FrameworkInfo = None,
                         tb_w: TensorboardWriter = None,
                         fw_impl: FrameworkImplementation = None,
                         mixed_precision_enable: bool = False,
@@ -105,6 +111,8 @@ def get_finalized_graph(initial_graph: Graph,
         quant_config (QuantizationConfig): QuantizationConfig containing parameters of how the model should be
             quantized.
         bit_width_config (BitWidthConfig): Config for bit-width selection. Defaults to None.
+        fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g.,
+            kernel channels indices, groups of layers by how they should be quantized, etc.)
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
         fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation.
         mixed_precision_enable: is mixed precision enabled.
@@ -112,17 +120,11 @@ def get_finalized_graph(initial_graph: Graph,
     Returns: Graph object that represents the model, after applying all required modifications to it.
     """
-    if quant_config.weights_error_method == QuantizationErrorMethod.HMSE:
-        if not running_gptq:
-            raise ValueError(f"The HMSE error method for parameters selection is only supported when running GPTQ "
-                             f"optimization due to long execution time that is not suitable for basic PTQ.")
-        Logger.warning("Using the HMSE error method for weights quantization parameters search. "
-                       "Note: This method may significantly increase runtime during the parameter search process.")
     ######################################
     # Graph substitution (prepare graph)
     ######################################
-    graph = substitute(initial_graph, fw_impl.get_substitutions_prepare_graph())
+    graph = substitute(initial_graph, fw_impl.get_substitutions_prepare_graph(fw_info))
     if tb_w is not None:
         tb_w.add_graph(graph, 'after_graph_preparation')
@@ -132,6 +134,7 @@ def get_finalized_graph(initial_graph: Graph,
     ##########################################
     for node in graph.nodes:
         node.prior_info = fw_impl.get_node_prior_info(node=node,
+                                                      fw_info=fw_info,
                                                       graph=graph)
     ##################################################
@@ -147,22 +150,28 @@ def get_finalized_graph(initial_graph: Graph,
     if tb_w is not None:
         tb_w.add_graph(transformed_graph, 'pre_statistics_collection_substitutions')
-    transformed_graph = load_fqc_configuration(transformed_graph, fqc)
-    # filter candidates per manual config
-    if bit_width_config:
-        set_manual_bitwidth_config(graph, bit_width_config)
+    ######################################
+    # Add quantization configurations
+    ######################################
+    transformed_graph = set_quantization_configuration_to_graph(graph=transformed_graph,
+                                                                quant_config=quant_config,
+                                                                bit_width_config=bit_width_config,
+                                                                mixed_precision_enable=mixed_precision_enable,
+                                                                running_gptq=running_gptq)
-    # TODO irena: remove after base config is used
-    for n in transformed_graph.nodes:
-        if not mixed_precision_enable:
-            n.quantization_cfg.candidates_quantization_cfg = [n.quantization_cfg.base_quantization_cfg]
+    ######################################
+    # Layer fusing
+    ######################################
+    fusing_info = FusingInfoGenerator(fqc.get_fusing_patterns()).generate_fusing_info(transformed_graph)
+    transformed_graph.fusing_info = fusing_info
+    transformed_graph.disable_fused_nodes_activation_quantization()
     ######################################
     # Channel equalization
     ######################################
     transformed_graph = substitute(transformed_graph,
-                                   fw_impl.get_substitutions_channel_equalization(quant_config))
+                                   fw_impl.get_substitutions_channel_equalization(quant_config,
+                                                                                  fw_info))
     if tb_w is not None:
         tb_w.add_graph(transformed_graph, 'after_graph_marking')
@@ -181,6 +190,7 @@ def get_finalized_graph(initial_graph: Graph,
 def read_model_to_graph(in_model: Any,
                         representative_data_gen: Callable,
                         fqc: FrameworkQuantizationCapabilities,
+                        fw_info: FrameworkInfo = None,
                         fw_impl: FrameworkImplementation = None) -> Graph:
     """
@@ -191,6 +201,8 @@ def read_model_to_graph(in_model: Any,
         representative_data_gen: Dataset used for calibration.
         fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
                       the attached framework operator's information.
+        fw_info: Information needed for quantization about the specific framework (e.g.,
+                kernel channels indices, groups of layers by how they should be quantized, etc.)
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -198,5 +210,6 @@ def read_model_to_graph(in_model: Any,
     """
     graph = fw_impl.model_reader(in_model,
                                  representative_data_gen)
+    graph.set_fw_info(fw_info)
     graph.set_fqc(fqc)
     return graph

model_compression_toolkit/core/keras/back2framework/float_model_builder.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import List
 from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.core import common
 from tensorflow.python.util.object_identity import Reference as TFReference
@@ -28,17 +29,20 @@ class FloatKerasModelBuilder(KerasModelBuilder):
     def __init__(self,
                  graph: common.Graph,
                  append2output=None,
+                 fw_info: FrameworkInfo = DEFAULT_KERAS_INFO,
                  return_float_outputs: bool = False):
         """
         Args:
             graph: Graph to build the model from.
             append2output: Nodes to append to model's output.
+            fw_info: Information about the specific framework of the model that is built.
             return_float_outputs: Whether the model returns float tensors or not.
         """
         super().__init__(graph,
                          append2output,
+                         fw_info,
                          return_float_outputs)
     def _quantize_node_activations(self,

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl