PyPI - mct-nightly - Versions diffs - 2.4.0.20250617.613__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250617.613py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py CHANGED Viewed

@@ -32,7 +32,6 @@ from model_compression_toolkit.core.common.substitutions.apply_substitutions imp
 def _collect_and_assign_act_threshold(graph: Graph,
                                       representative_data_gen: Callable,
                                       core_config: CoreConfig,
-                                      fw_info: FrameworkInfo,
                                       fw_impl: FrameworkImplementation):
     """
     Collect statistics after second moment correction and assign new thresholds to activations.
@@ -41,14 +40,12 @@ def _collect_and_assign_act_threshold(graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
      """
     mi = ModelCollector(graph,
                         fw_impl,
-                        fw_info,
-                        core_config.quantization_config) # Mark points for statistics collection
+                        core_config.quantization_config)  # Mark points for statistics collection
     for _data in tqdm(representative_data_gen()):
         mi.infer(_data)
@@ -63,14 +60,12 @@ def _collect_and_assign_act_threshold(graph: Graph,
 def quantized_model_builder_for_second_moment_correction(graph: common.Graph,
-                                                         fw_info: FrameworkInfo,
                                                          fw_impl: Any):
     """
     Build a framework model from a graph for second moment correction.
     Args:
-        graph: Graph to build the from.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
+        graph: Graph to build from.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -79,15 +74,13 @@ def quantized_model_builder_for_second_moment_correction(graph: common.Graph,
     quantized_tg = quantize_graph_weights(graph)
     quantized_model, user_info = fw_impl.model_builder(quantized_tg,
-                                                       mode=ModelBuilderMode.FLOAT,
-                                                       fw_info=fw_info)
+                                                       mode=ModelBuilderMode.FLOAT)
     return quantized_model
 def apply_second_moment_correction_to_graph(graph: Graph,
                                             representative_data_gen: Callable,
                                             core_config: CoreConfig,
-                                            fw_info: FrameworkInfo,
                                             fw_impl: FrameworkImplementation) -> Graph:
     """
      Apply second moment correction on graph.
@@ -96,15 +89,14 @@ def apply_second_moment_correction_to_graph(graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
      Returns:
          Graph after second moment correction.
      """
-    semi_quantized_model = quantized_model_builder_for_second_moment_correction(graph, fw_info, fw_impl)
+    semi_quantized_model = quantized_model_builder_for_second_moment_correction(graph, fw_impl)
     fw_impl.apply_second_moment_correction(semi_quantized_model, core_config, representative_data_gen, graph)
     graph = substitute(graph, fw_impl.get_substitutions_after_second_moment_correction(core_config.quantization_config))
-    _collect_and_assign_act_threshold(graph, representative_data_gen, core_config, fw_info, fw_impl)
+    _collect_and_assign_act_threshold(graph, representative_data_gen, core_config, fw_impl)
     return graph

model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py CHANGED Viewed

@@ -64,7 +64,6 @@ def calculate_bin_centers(bin_edges: np.ndarray) -> np.ndarray:
 def compute_activation_bias_correction(graph: Graph,
                                        quant_config: QuantizationConfig,
-                                       fw_info: FrameworkInfo,
                                        fw_impl: FrameworkImplementation,
                                        linear_node: BaseNode,
                                        prev_node: BaseNode,
@@ -76,7 +75,6 @@ def compute_activation_bias_correction(graph: Graph,
     Args:
         graph: Graph with nodes to compute the activation bias correction for each node's final activation quantization configuration.
         quant_config: QuantizationConfig of how the model should be quantized.
-        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         linear_node: Node to compute the activation bias correction for.
         prev_node: Node to compute the activation error caused by his activation quantization.
@@ -127,19 +125,18 @@ def compute_activation_bias_correction(graph: Graph,
     if normalized_bias < quant_config.activation_bias_correction_threshold:
         return graph
-    kernel = linear_node.get_weights_by_keys(fw_info.kernel_ops_attributes_mapping.get(linear_node.type)[0])
+    kernel = linear_node.get_weights_by_keys(linear_node.kernel_attr)
     # Compute the activation bias correction by applying the quantization error to the kernel, resulting in an output
     # size matching the number of output channels.
     if kernel is not None:
         # Get the axes that are not the output channel.
-        output_channel_index, input_channel_index = fw_info.kernel_channels_mapping.get(linear_node.type)
         axis_not_output_channel = list(range(len(kernel.shape)))
-        axis_not_output_channel.remove(output_channel_index)
+        axis_not_output_channel.remove(linear_node.channel_axis.output)
         # Special case of depthwise_conv2d in tensorflow, where we have a depth multiplier for the filters.
-        if output_channel_index == input_channel_index:
+        if linear_node.channel_axis.output == linear_node.channel_axis.input:
             axis_not_output_channel.remove(3)  # 3 is the depth multiplier index.
         activation_bias_correction_term = mean_diff * np.sum(kernel, axis=tuple(axis_not_output_channel))
@@ -150,7 +147,6 @@ def compute_activation_bias_correction(graph: Graph,
 def compute_activation_bias_correction_of_graph(graph: Graph,
                                                 quant_config: QuantizationConfig,
-                                                fw_info: FrameworkInfo,
                                                 fw_impl: FrameworkImplementation,
                                                 activation_bias_correction_node_matchers: Callable,
                                                 kernel_size: str) -> Graph:
@@ -160,7 +156,6 @@ def compute_activation_bias_correction_of_graph(graph: Graph,
     Args:
         graph: Graph with nodes to compute the activation bias correction.
         quant_config: QuantizationConfig of how the model should be quantized.
-        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         activation_bias_correction_node_matchers: Function to match the layers for activation bias correction.
         kernel_size: The framework specific attribute name of the convolution layer's kernel size.
@@ -177,7 +172,6 @@ def compute_activation_bias_correction_of_graph(graph: Graph,
             if prev_node is not None:
                 graph = compute_activation_bias_correction(graph=graph,
                                                            quant_config=quant_config,
-                                                           fw_info=fw_info,
                                                            fw_impl=fw_impl,
                                                            linear_node=n,
                                                            prev_node=prev_node,

model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py CHANGED Viewed

@@ -18,7 +18,6 @@ from typing import Any
 import numpy as np
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common import BaseNode, Graph
 from model_compression_toolkit.core.common.quantization.quantize_node import get_quantized_weights_attr_by_qc
 from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
@@ -26,7 +25,6 @@ from model_compression_toolkit.logger import Logger
 def compute_bias_correction_of_graph(graph: Graph,
-                                     fw_info: FrameworkInfo,
                                      fw_impl: FrameworkImplementation) -> Graph:
     """
     For each node in a graph, and for each candidate weights quantization configuration,
@@ -35,7 +33,6 @@ def compute_bias_correction_of_graph(graph: Graph,
     Args:
         graph: Graph with nodes to compute the bias correction for
         each node's weights quantization configuration candidates.
-        fw_info: Framework info like lists of nodes their kernel should quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -46,17 +43,15 @@ def compute_bias_correction_of_graph(graph: Graph,
     for n in graph.nodes:
         # Bias correction is computed based on the quantized kernel, so we need to get the specific kernel attribute
         # name out of all the weights attributes of the node.
-        if fw_info.is_kernel_op(n.type):
-            kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
-            if n.is_weights_quantization_enabled(kernel_attr):
+        if n.is_kernel_op:
+            if n.is_weights_quantization_enabled(n.kernel_attr):
                 # Bias correction is not applied to layers with constant inputs.
                 if n.has_positional_weights:
                     for candidate_qc in n.candidates_quantization_cfg:
                         candidate_qc.weights_quantization_cfg.weights_bias_correction = False
                 else:
                     _compute_bias_correction_per_candidate_qc(n,
-                                                              kernel_attr,
-                                                              fw_info,
+                                                              n.kernel_attr,
                                                               graph.get_in_stats_collector(n),
                                                               fw_impl=fw_impl)
     return graph
@@ -64,7 +59,6 @@ def compute_bias_correction_of_graph(graph: Graph,
 def _compute_bias_correction_per_candidate_qc(node: BaseNode,
                                               kernel_attr: str,
-                                              fw_info: FrameworkInfo,
                                               node_in_stats_collector: BaseStatsCollector,
                                               fw_impl: FrameworkImplementation):
     """
@@ -74,7 +68,6 @@ def _compute_bias_correction_per_candidate_qc(node: BaseNode,
     Args:
         node: Node to compute the bias correction for its different candidates.
         kernel_attr: The name of the kernel attribute of the node.
-        fw_info: Framework info like lists of nodes their kernel should quantized.
         node_in_stats_collector: Statistics collector of the node for the mean per-channel.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.

model_compression_toolkit/core/common/statistics_correction/statistics_correction.py CHANGED Viewed

@@ -32,7 +32,6 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
 def statistics_correction_runner(transformed_graph: Graph,
                                  core_config: CoreConfig,
-                                 fw_info: FrameworkInfo,
                                  fw_impl: FrameworkImplementation,
                                  tb_w: TensorboardWriter = None, ) -> Graph:
     """
@@ -41,7 +40,6 @@ def statistics_correction_runner(transformed_graph: Graph,
         transformed_graph: Graph to add statistics correction.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
@@ -59,7 +57,6 @@ def statistics_correction_runner(transformed_graph: Graph,
     # Compute bias correction to nodes' config candidates
     ########################################################
     tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
-                                                    fw_info,
                                                     fw_impl)
     if tb_w is not None:
@@ -71,7 +68,6 @@ def statistics_correction_runner(transformed_graph: Graph,
 def apply_statistics_correction(transformed_graph: Graph,
                                 representative_data_gen: Callable,
                                 core_config: CoreConfig,
-                                fw_info: FrameworkInfo,
                                 fw_impl: FrameworkImplementation,
                                 tb_w: TensorboardWriter = None, ) -> Graph:
     """
@@ -81,7 +77,6 @@ def apply_statistics_correction(transformed_graph: Graph,
         representative_data_gen (Callable): Dataset used for calibration.
         core_config (CoreConfig): Configuration object containing parameters of how the model should be
          quantized, including mixed precision parameters.
-        fw_info: FrameworkInfo object with information about the specific framework's model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
@@ -94,7 +89,7 @@ def apply_statistics_correction(transformed_graph: Graph,
     #############################################
     if core_config.quantization_config.weights_second_moment_correction:
         transformed_graph = apply_second_moment_correction_to_graph(transformed_graph, representative_data_gen,
-                                                                    core_config, fw_info, fw_impl)
+                                                                    core_config, fw_impl)
     #############################################
     # Apply Bias Correction

model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py CHANGED Viewed

@@ -97,10 +97,9 @@ class BatchNormalizationReconstruction(common.BaseSubstitution):
         # This feature disabled for models with weights quantization method of Power of 2
         for qc in source_node.candidates_quantization_cfg:
             # this feature is relevant only for layers with kernel op
-            kernel_attr = graph.fw_info.get_kernel_op_attributes(source_node.type)
-            if kernel_attr is None:
+            if source_node.kernel_attr is None:
                 Logger.error(f"Can't preform BatchNorm reconstruction on a node {source_node.name} without a kernel op.")
-            if (qc.weights_quantization_cfg.get_attr_config(kernel_attr[0]).weights_quantization_method
+            if (qc.weights_quantization_cfg.get_attr_config(source_node.kernel_attr).weights_quantization_method
                     == QuantizationMethod.POWER_OF_TWO):
                 Logger.warning("Second moment statistics correction feature disabled for models with weights "
                                "quantization method of Power of 2")

model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py CHANGED Viewed

@@ -157,7 +157,7 @@ class BatchNormalizationRefusing(common.BaseSubstitution):
         graph.remove_node(bn_node)
         graph.remove_node(source_node)
-        self._calc_weights_quantization_params(conv_bn, weights_scale, graph.fw_info)
+        self._calc_weights_quantization_params(conv_bn, weights_scale)
         assert num_nodes_before_substitution - len(graph.nodes) == 1
         assert num_edges_before_substitution - len(graph.edges) == 1
@@ -165,18 +165,15 @@ class BatchNormalizationRefusing(common.BaseSubstitution):
     def _calc_weights_quantization_params(self,
                                           conv_bn: BaseNode,
-                                          weights_scale: np.ndarray,
-                                          fw_info):
+                                          weights_scale: np.ndarray):
         """
         Update node weights quantization params.
         Args:
             conv_bn: Convolution node to update the weights quantization params.
             weights_scale: Weight scale factor in which to multiply the conv node's weight.
-            fw_info: FrameworkInfo object with information about the specific framework's model
         """
         # Conv layer is ensured to have a kernel attribute
-        kernel_attr = fw_info.get_kernel_op_attributes(conv_bn.type)[0]
-        conv_bn_kernel_cfg = conv_bn.final_weights_quantization_cfg.get_attr_config(kernel_attr)
+        conv_bn_kernel_cfg = conv_bn.final_weights_quantization_cfg.get_attr_config(conv_bn.kernel_attr)
         # In case of SYMMETRIC weight quantization method, we update the threshold by weights_scale
         if conv_bn_kernel_cfg.weights_quantization_method == QuantizationMethod.SYMMETRIC:
             original_threshold = conv_bn_kernel_cfg.weights_quantization_params[THRESHOLD]

model_compression_toolkit/core/common/substitutions/scale_equalization.py CHANGED Viewed

@@ -20,8 +20,6 @@ import scipy
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
-from model_compression_toolkit.defaultdict import DefaultDict
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
@@ -77,7 +75,6 @@ def fixed_second_moment_after_relu(mu: np.ndarray,
 def scale_reshaping(scale: np.ndarray,
                     op2d: common.BaseNode,
-                    kernel_channel_mapping: DefaultDict,
                     kernel_str: str,
                     in_channels: bool = True) -> np.ndarray:
     """
@@ -89,7 +86,6 @@ def scale_reshaping(scale: np.ndarray,
     Args:
         scale: Scale factor to scale the kernel channels by.
         op2d: Node to scale its kernel.
-        kernel_channel_mapping: Mapping from a layer to a tuple of indices of its output/input kernel channels.
         kernel_str: The framework specific attribute name of the convolution layer's weight/kernel.
         in_channels: Kernel's index of input channels.
@@ -99,12 +95,11 @@ def scale_reshaping(scale: np.ndarray,
     op_ndims = op2d.get_weights_by_keys(kernel_str).ndim
     reshape_target = np.ones(op_ndims, dtype=np.int32)
-    reshape_target[kernel_channel_mapping.get(op2d.type)[int(in_channels)]] = -1
+    reshape_target[op2d.channel_axis.input if in_channels else op2d.channel_axis.output] = -1
     return np.reshape(scale, reshape_target)
-def update_linear_nodes(fw_info: FrameworkInfo,
-                        first_op2d_node: BaseNode,
+def update_linear_nodes(first_op2d_node: BaseNode,
                         second_op2d_node: BaseNode,
                         scale_factor: np.ndarray,
                         kernel_str: str,
@@ -116,7 +111,6 @@ def update_linear_nodes(fw_info: FrameworkInfo,
     The scale factor contain a scale value per-channel.
     Args:
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         first_op2d_node: Node to multiply its kernel by the scale factor.
         second_op2d_node: Node to divide its kernel by the scale factor.
@@ -125,15 +119,12 @@ def update_linear_nodes(fw_info: FrameworkInfo,
         kernel_str: The framework specific attribute name of the convolution layer's weight/kernel.
     """
     w2_fixed = second_op2d_node.get_weights_by_keys(kernel_str) / scale_reshaping(scale_factor,
                                                                                   second_op2d_node,
-                                                                                  fw_info.kernel_channels_mapping,
                                                                                   kernel_str)
     w1_fixed = first_op2d_node.get_weights_by_keys(kernel_str) * scale_reshaping(scale_factor,
                                                                                  first_op2d_node,
-                                                                                 fw_info.kernel_channels_mapping,
                                                                                  kernel_str,
                                                                                  in_channels=False)
@@ -168,8 +159,7 @@ def calculate_scale_correction(first_op2d_node: BaseNode) -> tuple:
     return scale_factor
-def scale_equalization_lnl(fw_info: FrameworkInfo,
-                           first_op2d_node: BaseNode,
+def scale_equalization_lnl(first_op2d_node: BaseNode,
                            second_op2d_node: BaseNode,
                            kernel_str: str,
                            bias_str: str):
@@ -179,7 +169,6 @@ def scale_equalization_lnl(fw_info: FrameworkInfo,
     follows the activation node to get the same expected output without the scaling.
     Args:
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         first_op2d_node: Node to multiply its kernel by the scale factor.
         second_op2d_node: Node to divide its kernel by the scale factor.
@@ -189,8 +178,7 @@ def scale_equalization_lnl(fw_info: FrameworkInfo,
     """
     scale_factor = calculate_scale_correction(first_op2d_node)
-    update_linear_nodes(fw_info,
-                        first_op2d_node,
+    update_linear_nodes(first_op2d_node,
                         second_op2d_node,
                         scale_factor,
                         kernel_str,
@@ -206,7 +194,6 @@ class BaseScaleEqualization(common.BaseSubstitution):
     def __init__(self,
                  quant_config: QuantizationConfig,
-                 fw_info: FrameworkInfo,
                  matcher_instance,
                  kernel_str: str,
                  bias_str: str):
@@ -214,13 +201,11 @@ class BaseScaleEqualization(common.BaseSubstitution):
         Initialize a ScaleEqualization object.
         Args:
             quant_config: QuantizationConfig containing parameters of how the model should be quantized.
-            fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
             groups of layers by how they should be quantized, etc.)
             matcher_instance: Per substitution matcher instance of type WalkMatcher
         """
         self.quant_config = quant_config
-        self.fw_info = fw_info
         self.kernel_str = kernel_str
         self.bias_str = bias_str
         super().__init__(matcher_instance=matcher_instance)
@@ -243,8 +228,7 @@ class BaseScaleEqualization(common.BaseSubstitution):
         act_node = nodes_list[1]
         second_op2d_node = nodes_list[-1]
         if first_op2d_node.prior_info.std_output is not None and act_node.is_activation_quantization_enabled():
-            scale_equalization_lnl(self.fw_info,
-                                   first_op2d_node,
+            scale_equalization_lnl(first_op2d_node,
                                    second_op2d_node,
                                    self.kernel_str,
                                    self.bias_str)

model_compression_toolkit/core/common/substitutions/shift_negative_activation.py CHANGED Viewed

@@ -46,7 +46,6 @@ If the linear node pads the input tensor with zeros, we modify the padded value
 def op2d_bias_correction(op2d_node: BaseNode,
                          shift_to_correct: float,
-                         fw_info: FrameworkInfo,
                          bias_str: str,
                          bias_flag_str: str):
     """
@@ -57,7 +56,6 @@ def op2d_bias_correction(op2d_node: BaseNode,
         op2d_node: Node to compute its bias correction term.
         shift_to_correct: Value that was used to shift the output tensor of
         the non-linear node.
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         bias_str:
         bias_flag_str: The framework specific attribute name of the bias flag.
     """
@@ -76,14 +74,13 @@ def op2d_bias_correction(op2d_node: BaseNode,
     # Each node adds a different noise due to the shifting. It depends on the
     # dimensions of the kernel, thus the correction term is a function of
     # the layer type.
-    kernel = op2d_node.get_weights_by_keys(fw_info.kernel_ops_attributes_mapping.get(op2d_node.type)[0])
+    kernel = op2d_node.get_weights_by_keys(op2d_node.kernel_attr)
     if kernel is not None:
-        output_channel_index, input_channel_index = fw_info.kernel_channels_mapping.get(op2d_node.type)
         axis_not_output_channel = list(range(len(kernel.shape)))
-        axis_not_output_channel.remove(output_channel_index)
+        axis_not_output_channel.remove(op2d_node.channel_axis.output)
         # special case of depthwise_conv2d in tensorflow, where we have a depth multiplier for the filters
-        if output_channel_index == input_channel_index:
+        if op2d_node.channel_axis.output == op2d_node.channel_axis.input:
             axis_not_output_channel.remove(3)  # 3 is the depth multiplier index
         bias_correction = shift_to_correct * np.sum(kernel, axis=tuple(axis_not_output_channel))
@@ -250,7 +247,6 @@ def shift_negative_function(graph: Graph,
                             core_config: CoreConfig,
                             non_linear_node: BaseNode,
                             op2d_node: BaseNode,
-                            fw_info: FrameworkInfo,
                             create_add_node: Callable,
                             get_padding_values: Callable,
                             create_pad_node: Callable,
@@ -276,8 +272,6 @@ def shift_negative_function(graph: Graph,
         non_linear_node: Non-linear node with negative values to shift.
         op2d_node: Linear node to correct its bias to overcome the expected error due to
         the shifting.
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
-        groups of layers by how they should be quantized, etc.)
         create_add_node: Function to create an add node.
         get_padding_values: Function to compute the op2d node's padding values
         create_pad_node: Function to create an pad node.
@@ -299,7 +293,6 @@ def shift_negative_function(graph: Graph,
     # all candidates have same activation config, so taking the first candidate for calculations
     non_linear_node_cfg_candidate = non_linear_node.candidates_quantization_cfg[0].activation_quantization_cfg
     # get the non-linear activation threshold
     activation_threshold = non_linear_node_cfg_candidate.activation_quantization_params.get(THRESHOLD)
@@ -390,7 +383,6 @@ def shift_negative_function(graph: Graph,
                            first_node=non_linear_node)
     op2d_bias_correction(op2d_node,
                          shift_value,
-                         fw_info,
                          bias_str,
                          bias_flag_str)
@@ -401,8 +393,7 @@ def shift_negative_function(graph: Graph,
     graph.set_out_stats_collector_to_node(add_node, add_node_stats_collector)
     graph.shift_stats_collector(add_node, np.array(shift_value))
-    set_quantization_configs_to_node(fw_info=fw_info,
-                                     node=add_node,
+    set_quantization_configs_to_node(node=add_node,
                                      graph=graph,
                                      quant_config=core_config.quantization_config,
                                      fqc=graph.fqc,
@@ -428,8 +419,7 @@ def shift_negative_function(graph: Graph,
                                 last_node=op2d_node)
         # Set quantization configuration to node, even though we do not quantize it:
-        set_quantization_configs_to_node(fw_info=fw_info,
-                                         node=pad_node,
+        set_quantization_configs_to_node(node=pad_node,
                                          graph=graph,
                                          quant_config=core_config.quantization_config,
                                          fqc=graph.fqc,
@@ -472,7 +462,6 @@ def shift_negative_function(graph: Graph,
             candidate_qc.weights_quantization_cfg.get_attr_config(attr).enable_weights_quantization = False
         candidate_qc.activation_quantization_cfg = create_node_activation_qc(core_config.quantization_config,
-                                                                             fw_info,
                                                                              add_node_qco[op_qc_idx])
         candidate_qc.activation_quantization_cfg.set_activation_quantization_param({THRESHOLD: activation_threshold,
@@ -573,7 +562,6 @@ def get_next_nodes_to_correct(n: BaseNode,
 def apply_shift_negative_correction(graph: Graph,
                                     core_config: CoreConfig,
-                                    fw_info: FrameworkInfo,
                                     snc_node_types: NodeOperationMatcher,
                                     linear_node_types: NodeOperationMatcher,
                                     bypass_node_types: NodeOperationMatcher,
@@ -593,7 +581,6 @@ def apply_shift_negative_correction(graph: Graph,
     Args:
         graph: Graph to apply the substitution on.
         core_config: Quantization configuration to build the substitutions list according to.
-        fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
         groups of layers by how they should be quantized, etc.)
         snc_node_types: Types of activation nodes with negative outputs to consider.
         linear_node_types: Types of linear nodes to consider.
@@ -632,7 +619,6 @@ def apply_shift_negative_correction(graph: Graph,
                                                 core_config,
                                                 n,
                                                 linear_node,
-                                                fw_info,
                                                 create_add_node,
                                                 get_padding_values,
                                                 create_pad_node,

model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py CHANGED Viewed

@@ -50,9 +50,7 @@ class BaseVirtualActivationWeightsComposition(BaseSubstitution):
             return graph
         # Virtual composed activation-weights node
-        v_node = VirtualActivationWeightsNode(act_node,
-                                              weights_node,
-                                              fw_info=graph.fw_info)
+        v_node = VirtualActivationWeightsNode(act_node, weights_node)
         # Update graph
         graph.add_node(v_node)

model_compression_toolkit/core/common/substitutions/weights_activation_split.py CHANGED Viewed

@@ -50,7 +50,7 @@ class BaseWeightsActivationSplit(BaseSubstitution):
             Graph after applying the substitution.
         """
         # The decomposition works on linear nodes, that is, nodes with kernel ops
-        kernel_attr = graph.fw_info.get_kernel_op_attributes(node.type)[0]
+        kernel_attr = node.kernel_attr
         if kernel_attr is None:
             Logger.critical(f"Trying to split node weights and activation, but node "
                             f"{node.name} doesn't have a kernel attribute.")

model_compression_toolkit/core/common/visualization/nn_visualizer.py CHANGED Viewed

@@ -59,22 +59,19 @@ class NNVisualizer:
     def __init__(self,
                  graph_float: Graph,
                  graph_quantized: Graph,
-                 fw_impl: FrameworkImplementation,
-                 fw_info: FrameworkInfo):
+                 fw_impl: FrameworkImplementation):
         """
         Initialize a NNVisualizer object.
         Args:
             graph_float: Float version of the graph.
             graph_quantized: Quantized version of the graph.
             fw_impl: Framework implementation with framework-specific methods implementation.
-            fw_info: Framework info with framework-specific information.
         """
         self.graph_float = graph_float
         self.graph_quantized = graph_quantized
         self.fw_impl = fw_impl
-        self.fw_info = fw_info
         # Get compare points of two graphs.
         self.compare_points, self.compare_points_name = _get_compare_points(self.graph_quantized)
@@ -92,13 +89,11 @@ class NNVisualizer:
         self.quantized_model, _ = self.fw_impl.model_builder(self.graph_quantized,
                                                              mode=ModelBuilderMode.QUANTIZED,
-                                                             append2output=self.compare_points,
-                                                             fw_info=self.fw_info)
+                                                             append2output=self.compare_points)
         self.float_model, _ = self.fw_impl.model_builder(self.graph_float,
                                                          mode=ModelBuilderMode.FLOAT,
-                                                         append2output=self.compare_points_float,
-                                                         fw_info=self.fw_info)
+                                                         append2output=self.compare_points_float)
     def has_compare_points(self) -> bool:
         """

model_compression_toolkit/core/common/visualization/tensorboard_writer.py CHANGED Viewed

@@ -89,20 +89,18 @@ class TensorboardWriter(object):
     Class to log events to display using Tensorboard such as graphs, histograms, images, etc.
     """
-    def __init__(self, dir_path: str, fw_info: FrameworkInfo):
+    def __init__(self, dir_path: str):
         """
         Initialize a TensorboardWriter object.
         Args:
             dir_path: Path to save all events to display on Tensorboard.
-            fw_info: FrameworkInfo object (needed for computing nodes' weights memory).
         """
         self.dir_path = dir_path
         # we hold EventWriter per tag name, so events can be gathered by tags (like phases during the quantization
         # process).
         self.tag_name_to_event_writer = {}
-        self.fw_info = fw_info
     def close(self):
         """
@@ -232,7 +230,7 @@ class TensorboardWriter(object):
             if n.final_weights_quantization_cfg is not None:
                 attr.update(n.final_weights_quantization_cfg.__dict__)
             elif n.candidates_quantization_cfg is not None:
-                attr.update(n.get_unified_weights_candidates_dict(self.fw_info))
+                attr.update(n.get_unified_weights_candidates_dict())
             return attr
         def __get_node_attr(n: BaseNode) -> Dict[str, Any]:
@@ -296,7 +294,7 @@ class TensorboardWriter(object):
             return NodeExecStats(node_name=n.name,
                                  memory=[AllocatorMemoryUsed(
-                                     total_bytes=int(n.get_memory_bytes(self.fw_info))
+                                     total_bytes=int(n.get_memory_bytes())
                                  )])
         graph_def = GraphDef()  # GraphDef to add to Tensorboard
@@ -526,13 +524,13 @@ class TensorboardWriter(object):
         er.add_event(event)
         er.flush()
-def init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter:
+def init_tensorboard_writer() -> TensorboardWriter:
     """
     Create a TensorBoardWriter object initialized with the logger dir path if it was set,
     or None otherwise.
     Args:
-        fw_info: FrameworkInfo object.
     Returns:
         A TensorBoardWriter object.
@@ -541,7 +539,7 @@ def init_tensorboard_writer(fw_info: FrameworkInfo) -> TensorboardWriter:
     if Logger.LOG_PATH is not None:
         tb_log_dir = os.path.join(os.getcwd(), Logger.LOG_PATH, 'tensorboard_logs')
         Logger.info(f'To use Tensorboard, please run: tensorboard --logdir {tb_log_dir}')
-        tb_w = TensorboardWriter(tb_log_dir, fw_info)
+        tb_w = TensorboardWriter(tb_log_dir)
     return tb_w

mct-nightly 2.4.0.20250617.613__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl

mct-nightly 2.4.0.20250617.613py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl