PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/graph/base_node.py CHANGED Viewed

@@ -14,37 +14,31 @@
 # ==============================================================================
 import copy
-from typing import Dict, Any, Tuple, List, Type, Union, NamedTuple
+from typing import Dict, Any, Tuple, List, Type, Union
 import numpy as np
-from model_compression_toolkit.core.common.framework_info import get_fw_info, ChannelAxisMapping
 from model_compression_toolkit.constants import WEIGHTS_NBITS_ATTRIBUTE, CORRECTED_BIAS_ATTRIBUTE, \
     ACTIVATION_N_BITS_ATTRIBUTE, FP32_BYTES_PER_PARAMETER
-from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import NodeQuantizationConfig
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
     ActivationQuantizationMode
 from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions, \
+    OpQuantizationConfig
+from model_compression_toolkit.target_platform_capabilities.schema.schema_functions import max_input_activation_n_bits
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import LayerFilterParams
+from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
+    FrameworkQuantizationCapabilities
 WeightAttrT = Union[str, int]
-class NodeFrameworkInfo(NamedTuple):
-    """
-    Node's specific framework information.
-    """
-    channel_axis: ChannelAxisMapping
-    out_channel_axis: int
-    minmax: Tuple[float, float]
-    kernel_attr: str
 class BaseNode:
     """
     Class to represent a node in a graph that represents the model.
     """
     def __init__(self,
                  name: str,
                  framework_attr: Dict[str, Any],
@@ -90,84 +84,28 @@ class BaseNode:
         self.inputs_as_list = inputs_as_list
         self.final_weights_quantization_cfg = None
         self.final_activation_quantization_cfg = None
-        self.quantization_cfg: NodeQuantizationConfig = None
+        self.candidates_quantization_cfg = None
         self.prior_info = None
         self.has_activation = has_activation
         self.is_custom = is_custom
-        self.node_fw_info = self._get_fw_node_attrs(layer_class, framework_attr)
-    def _get_fw_node_attrs(self, node_type, framework_attr):
-        fw_info = get_fw_info()
-        return None if fw_info is None else NodeFrameworkInfo(
-            fw_info.get_kernel_channels(node_type),
-            fw_info.get_out_channel_axis(node_type),
-            fw_info.get_layer_min_max(node_type, framework_attr),
-            fw_info.get_kernel_op_attribute(node_type),
-        )
-    def _assert_fw_info_exists(self):
-        """
-        Verify NodeFrameworkInfo was initialized.
-        """
-        assert self.node_fw_info is not None, f"NodeFrameworkInfo not initialized for node {self.name}"  # pragma: no cover
-    @property
-    def channel_axis(self) -> ChannelAxisMapping:
-        """
-        Extract channels axis from node's NodeFrameworkInfo.
-        Returns:
-            Channels axis named tuple.
-        """
-        self._assert_fw_info_exists()
-        return self.node_fw_info.channel_axis
-    @property
-    def out_channel_axis(self) -> int:
-        """
-        Extract output channel axis from node's NodeFrameworkInfo.
-        Returns:
-            Output channel axis.
-        """
-        self._assert_fw_info_exists()
-        return self.node_fw_info.out_channel_axis
     @property
-    def minmax(self) -> Tuple[float, float]:
+    def type(self):
         """
-        Extract expected min-max activation values from node's NodeFrameworkInfo.
+        A function to get the node's layer_class op for convenient comparison
         Returns:
-            A tuple of min-max values.
-        """
-        self._assert_fw_info_exists()
-        return self.node_fw_info.minmax
-    @property
-    def kernel_attr(self) -> str:
+            the node's layer_class
         """
-        Extract kernel name from node's NodeFrameworkInfo.
+        return self.layer_class
-        Returns:
-            Kernel name.
+    def get_has_activation(self):
         """
-        self._assert_fw_info_exists()
-        return self.node_fw_info.kernel_attr
+        Returns has_activation attribute.
-    @property
-    def candidates_quantization_cfg(self):
-        assert self.quantization_cfg
-        return self.quantization_cfg.candidates_quantization_cfg
+        Returns: Whether the node has activation to quantize.
-    @property
-    def type(self):
-        """
-        A function to get the node's layer_class op for convenient comparison
-        Returns:
-            the node's layer_class
         """
-        return self.layer_class
+        return self.has_activation
     @property
     def has_positional_weights(self):
@@ -195,31 +133,19 @@ class BaseNode:
         Returns: Whether node activation quantization is enabled or not.
         """
         return self._is_single_quant_mode(ActivationQuantizationMode.QUANT)
-    def is_fln_no_quantization(self) -> bool:
+    def is_fln_quantization(self) -> bool:
         """
-        Returns: Whether node is FLN no quantization.
+        Returns: Whether the node's activation quantization is FLN
         """
-        return self._is_single_quant_mode(ActivationQuantizationMode.FLN_NO_QUANT)
+        return self._is_single_quant_mode(ActivationQuantizationMode.FLN_QUANT)
     def is_quantization_preserving(self) -> bool:
         """
         Returns: Whether node activation quantization information is preserved from its inputs.
         """
         return self._is_single_quant_mode(ActivationQuantizationMode.PRESERVE_QUANT)
-    def is_no_quantization(self) -> bool:
-        """
-        Returns: Whether node is no quantization.
-        """
-        return self._is_single_quant_mode(ActivationQuantizationMode.NO_QUANT)
-    def is_fln_quantization(self) -> bool:
-        """
-        Returns: Whether the node's activation quantization is FLN
-        """
-        return self._is_single_quant_mode(ActivationQuantizationMode.FLN_QUANT)
     def is_weights_quantization_enabled(self, attr_name: str) -> bool:
         """
         Checks whether a node's weights attribute quantization is enabled.
@@ -372,11 +298,14 @@ class BaseNode:
         return input_tensors
-    def get_num_parameters(self) -> Tuple[int,int]:
+    def get_num_parameters(self, fw_info) -> Tuple[int,int]:
         """
         Compute the number of parameters the node holds.
         It returns a tuple: Number of quantized parameters, number of float parameters.
+        Args:
+            fw_info: Framework info to decide which attributes should be quantized.
         Returns:
             A tuple of (Number of quantized parameters, number of float parameters).
@@ -385,10 +314,11 @@ class BaseNode:
         q_node_num_params = 0
-        if self.kernel_attr is not None:
-            w = self.get_weights_by_keys(self.kernel_attr)
-            if w is not None:
-                q_node_num_params += w.flatten().shape[0]
+        for attr in fw_info.get_kernel_op_attributes(self.type):
+            if attr is not None:
+                w = self.get_weights_by_keys(attr)
+                if w is not None:
+                    q_node_num_params += w.flatten().shape[0]
         f_node_num_params = total_node_params - q_node_num_params
@@ -396,19 +326,22 @@ class BaseNode:
         assert int(f_node_num_params) == f_node_num_params
         return int(q_node_num_params), int(f_node_num_params)
-    def get_memory_bytes(self) -> float:
+    def get_memory_bytes(self, fw_info) -> float:
         """
         Compute the number of bytes the node's memory requires.
+        Args:
+            fw_info: Framework info to decide which attributes should be quantized.
         Returns: Number of bytes the node's memory requires.
         """
         # TODO: this method is used for tensorboard only. If we want to enable logging of other attributes memory
         #  then it needs to be modified. But, it might be better to remove this method from the BaseNode completely.
-        kernel_attr = self.kernel_attr
+        kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
         if kernel_attr is None:
             return 0
-        q_params, f_params = self.get_num_parameters()
+        q_params, f_params = self.get_num_parameters(fw_info)
         if self.final_weights_quantization_cfg is None:  # float coefficients
             memory = (f_params+q_params) * FP32_BYTES_PER_PARAMETER
         else:
@@ -418,12 +351,15 @@ class BaseNode:
         return memory
-    def get_unified_weights_candidates_dict(self) -> Dict[str, Any]:
+    def get_unified_weights_candidates_dict(self, fw_info) -> Dict[str, Any]:
         """
         In Mixed-Precision, a node's kernel can have multiple candidates for weights quantization configuration.
         In order to display a single view of a node (for example, for logging in TensorBoard) we need a way
         to create a single dictionary from all candidates.
-        This method is aimed to build such a unified dictionary for a node.
+        This method is aimed to build such an unified dictionary for a node.
+        Args:
+            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         Returns: A dictionary containing information from node's weight quantization configuration candidates.
@@ -433,7 +369,7 @@ class BaseNode:
         # We assume that only the kernel attribute have more than one candidate, since we only allow to
         # quantize the kernel using mixed precision
         # TODO: need to modify if we want to present a unified config for other attributes
-        kernel_attr = self.kernel_attr
+        kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
         if kernel_attr is None:
             # This node doesn't have a kernel attribute
             return {}
@@ -501,13 +437,20 @@ class BaseNode:
         candidates = self.get_all_weights_attr_candidates(attr)
         return all(candidate == candidates[0] for candidate in candidates[1:])
-    def has_kernel_weight_to_quantize(self):
+    def has_kernel_weight_to_quantize(self, fw_info):
         """
-        Checks whether the node has kernel attribute that need to be quantized according to the node's framework info.
+        Checks whether the node has kernel attribute that need to be quantized according to the framework info.
+        Args:
+            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
-        Returns: Whether the node's kernel need to be quantized.
+        Returns: Whether the node has weights that need to be quantized.
         """
-        return self.kernel_attr and self.get_weights_by_keys(self.kernel_attr) is not None
+        attrs = fw_info.get_kernel_op_attributes(self.type)
+        for attr in attrs:
+            if attr and self.get_weights_by_keys(attr) is not None:
+                return True
+        return False
     def has_any_weight_attr_to_quantize(self) -> bool:
         """
@@ -625,9 +568,8 @@ class BaseNode:
         Returns: True if the node has at list one quantization configuration candidate with activation quantization enabled.
         """
-        return (len(self.candidates_quantization_cfg) > 0 and
-                any([c.activation_quantization_cfg.enable_activation_quantization
-                     for c in self.candidates_quantization_cfg]))
+        return len(self.candidates_quantization_cfg) > 0 and \
+            any([c.activation_quantization_cfg.enable_activation_quantization for c in self.candidates_quantization_cfg])
     def get_all_weights_attr_candidates(self, attr: str) -> List[WeightsAttrQuantizationConfig]:
         """
@@ -643,6 +585,79 @@ class BaseNode:
         # the inner method would log an exception.
         return [c.weights_quantization_cfg.get_attr_config(attr) for c in self.candidates_quantization_cfg]
+    def get_qco(self, fqc: FrameworkQuantizationCapabilities) -> QuantizationConfigOptions:
+        """
+        Get the QuantizationConfigOptions of the node according
+        to the mappings from layers/LayerFilterParams to the OperatorsSet in the TargetPlatformCapabilities.
+        Args:
+            fqc: FQC to extract the QuantizationConfigOptions for the node.
+        Returns:
+            QuantizationConfigOptions of the node.
+        """
+        if fqc is None:
+            Logger.critical(f'Can not retrieve QC options for None FQC')  # pragma: no cover
+        for fl, qco in fqc.filterlayer2qco.items():
+            if self.is_match_filter_params(fl):
+                return qco
+        # Extract qco with is_match_type to overcome mismatch of function types in TF 2.15
+        matching_qcos = [_qco for _type, _qco in fqc.layer2qco.items() if self.is_match_type(_type)]
+        if matching_qcos:
+            if all([_qco == matching_qcos[0] for _qco in matching_qcos]):
+                return matching_qcos[0]
+            else:
+                Logger.critical(f"Found duplicate qco types for node '{self.name}' of type '{self.type}'!")  # pragma: no cover
+        return fqc.tpc.default_qco
+    def filter_node_qco_by_graph(self, fqc: FrameworkQuantizationCapabilities,
+                                 next_nodes: List, node_qc_options: QuantizationConfigOptions
+                                 ) -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig]]:
+        """
+        Filter quantization config options that don't match the graph.
+        A node may have several quantization config options with 'activation_n_bits' values, and
+        the next nodes in the graph may support different bit-width as input activation. This function
+        filters out quantization config that don't comply to these attributes.
+        Args:
+            fqc: FQC to extract the QuantizationConfigOptions for the next nodes.
+            next_nodes: Output nodes of current node.
+            node_qc_options: Node's QuantizationConfigOptions.
+        Returns:
+        """
+        # Filter quantization config options that don't match the graph.
+        _base_config = node_qc_options.base_config
+        _node_qc_options = node_qc_options.quantization_configurations
+        if len(next_nodes):
+            next_nodes_qc_options = [_node.get_qco(fqc) for _node in next_nodes]
+            next_nodes_supported_input_bitwidth = min([max_input_activation_n_bits(op_cfg)
+                                                       for qc_opts in next_nodes_qc_options
+                                                       for op_cfg in qc_opts.quantization_configurations])
+            # Filter node's QC options that match next nodes input bit-width.
+            _node_qc_options = [_option for _option in _node_qc_options
+                                if _option.activation_n_bits <= next_nodes_supported_input_bitwidth]
+            if len(_node_qc_options) == 0:
+                Logger.critical(f"Graph doesn't match FQC bit configurations: {self} -> {next_nodes}.")  # pragma: no cover
+            # Verify base config match
+            if any([node_qc_options.base_config.activation_n_bits > max_input_activation_n_bits(qc_opt.base_config)
+                    for qc_opt in next_nodes_qc_options]):
+                # base_config activation bits doesn't match next node supported input bit-width -> replace with
+                # a qco from quantization_configurations with maximum activation bit-width.
+                if len(_node_qc_options) > 0:
+                    output_act_bitwidth = {qco.activation_n_bits: i for i, qco in enumerate(_node_qc_options)}
+                    _base_config = _node_qc_options[output_act_bitwidth[max(output_act_bitwidth)]]
+                    Logger.warning(f"Node {self} base quantization config changed to match Graph and FQC configuration.\nCause: {self} -> {next_nodes}.")
+                else:
+                    Logger.critical(f"Graph doesn't match FQC bit configurations: {self} -> {next_nodes}.")  # pragma: no cover
+        return _base_config, _node_qc_options
     def is_match_type(self, _type: Type) -> bool:
         """
         Check if input type matches the node type, either in instance type or in type name.
@@ -675,7 +690,7 @@ class BaseNode:
             return False
         # Get attributes from node to filter
-        layer_config = self.framework_attr.copy()
+        layer_config = self.framework_attr
         if hasattr(self, "op_call_kwargs"):
             layer_config.update(self.op_call_kwargs)
@@ -709,7 +724,7 @@ class BaseNode:
             Logger.critical(f"SIMD is expected to be a non-positive integer but found: {_simd}")
         return _simd
-    def sort_node_candidates(self):
+    def sort_node_candidates(self, fw_info):
         """
         Sorts the node candidates.
         We assume that the candidates are ordered in the following way (for mixed precision purposes):
@@ -718,12 +733,17 @@ class BaseNode:
             - If the node doesn't have a kernel we only consider the candidate activation number of bits to sort
             the candidates in descending order.
         The operation is done inplace.
+        Args:
+            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         """
-        if self.quantization_cfg.candidates_quantization_cfg is not None:
-            if self.kernel_attr is not None:
-                self.quantization_cfg.candidates_quantization_cfg.sort(
-                    key=lambda c: (c.weights_quantization_cfg.get_attr_config(self.kernel_attr).weights_n_bits,
+        if self.candidates_quantization_cfg is not None:
+            kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
+            if kernel_attr is not None:
+                self.candidates_quantization_cfg.sort(
+                    key=lambda c: (c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits,
                                    c.activation_quantization_cfg.activation_n_bits), reverse=True)
             else:
-                self.quantization_cfg.candidates_quantization_cfg.sort(
-                    key=lambda c: c.activation_quantization_cfg.activation_n_bits, reverse=True)
+                self.candidates_quantization_cfg.sort(key=lambda c: c.activation_quantization_cfg.activation_n_bits,
+                                                      reverse=True)

model_compression_toolkit/core/common/graph/functional_node.py CHANGED Viewed

@@ -1,21 +1,6 @@
-# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
 from typing import Dict, Any, Tuple, Type, List, Union
-from model_compression_toolkit.core.common.framework_info import get_fw_info
+from model_compression_toolkit.verify_packages import FOUND_TF
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 import numpy as np
@@ -60,7 +45,6 @@ class FunctionalNode(BaseNode):
             inputs_as_list: Whether to pass the node its input tensors as a list or not when calling the layer.
             has_activation: Whether the node has activations that we might want to quantize.
             tensor_input_allocs: A list of indices and strings for allocations input tensors in the node's args and kwargs.
         """
         super().__init__(name,
@@ -79,7 +63,6 @@ class FunctionalNode(BaseNode):
         self.op_call_args = list(op_call_args)
         self.functional_op = functional_op
         self.tensor_input_allocs = [] if tensor_input_allocs is None else tensor_input_allocs
-        self.node_fw_info = self._get_fw_node_attrs(functional_op, framework_attr)
     @property
     def type(self):
@@ -103,4 +86,4 @@ class FunctionalNode(BaseNode):
         """
         names_match = _type.__name__ == self.type.__name__
-        return names_match or super().is_match_type(_type)
+        return super().is_match_type(_type) or names_match

model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py CHANGED Viewed

@@ -15,11 +15,13 @@
 import abc
 import uuid
+from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.constants import VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX, \
     VIRTUAL_WEIGHTS_SUFFIX, VIRTUAL_ACTIVATION_SUFFIX, FLOAT_BITWIDTH
+from model_compression_toolkit.core.common.framework_info import DEFAULT_KERNEL_ATTRIBUTES
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
-    CandidateNodeQuantizationConfig, NodeQuantizationConfig
+    CandidateNodeQuantizationConfig
 from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
@@ -75,11 +77,8 @@ class VirtualSplitWeightsNode(VirtualSplitNode):
         self.name = origin_node.name + VIRTUAL_WEIGHTS_SUFFIX
-        self.quantization_cfg = NodeQuantizationConfig(
-            candidates_quantization_cfg=origin_node.get_unique_weights_candidates(kernel_attr),
-            base_quantization_cfg=None, validate=False
-        )
-        for c in self.quantization_cfg.candidates_quantization_cfg:
+        self.candidates_quantization_cfg = origin_node.get_unique_weights_candidates(kernel_attr)
+        for c in self.candidates_quantization_cfg:
             c.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
             c.activation_quantization_cfg.activation_n_bits = FLOAT_BITWIDTH
@@ -108,9 +107,10 @@ class VirtualSplitActivationNode(VirtualSplitNode):
         self.weights = {}
         self.layer_class = activation_class
-        self.quantization_cfg = NodeQuantizationConfig(candidates_quantization_cfg=origin_node.get_unique_activation_candidates(),
-                                                       base_quantization_cfg=None, validate=False)
-        self.quantization_cfg.disable_weights_quantization()
+        self.candidates_quantization_cfg = origin_node.get_unique_activation_candidates()
+        for c in self.candidates_quantization_cfg:
+            c.weights_quantization_cfg.enable_weights_quantization = False
+            c.weights_quantization_cfg.weights_n_bits = FLOAT_BITWIDTH
 class VirtualActivationWeightsNode(VirtualNode):
@@ -128,23 +128,28 @@ class VirtualActivationWeightsNode(VirtualNode):
     def __init__(self,
                  act_node: BaseNode,
-                 weights_node: BaseNode):
+                 weights_node: BaseNode,
+                 fw_info: FrameworkInfo):
         """
         Init a VirtualActivationWeightsNode object.
         Args:
             act_node: The original activation node.
             weights_node: The original weights node.
+            fw_info: A FrameworkInfo object with framework specific information.
         """
         # Validate weights node
+        kernel_attrs = fw_info.get_kernel_op_attributes(weights_node.type)
+        assert len(kernel_attrs) == 1 and kernel_attrs[0] is not None, f'Expected exactly one kernel attr, {kernel_attrs}'
+        kernel_attr = kernel_attrs[0]
         conf_weights = [attr for attr in weights_node.weights if weights_node.is_configurable_weight(attr)]
-        if len(conf_weights) > 1 or len(conf_weights) == 1 and not weights_node.is_configurable_weight(weights_node.kernel_attr):
+        if len(conf_weights) > 1 or len(conf_weights) == 1 and not weights_node.is_configurable_weight(kernel_attr):
             raise NotImplementedError(f'Only kernel weight can be configurable. Got configurable {conf_weights}.')
         weights = weights_node.weights.copy()
         act_node_w_rename = {}
         if act_node.weights:
-            if act_node.kernel_attr:
+            if fw_info.get_kernel_op_attributes(act_node) != DEFAULT_KERNEL_ATTRIBUTES:
                 raise NotImplementedError(f'Node {act_node} with kernel cannot be used as activation for '
                                           f'VirtualActivationWeightsNode.')
             if act_node.has_any_configurable_weight():
@@ -152,7 +157,7 @@ class VirtualActivationWeightsNode(VirtualNode):
                                           'VirtualActivationWeightsNode.')
             # combine weights from activation and weights
             for w_id, w in act_node.weights.items():
-                if w_id not in weights and not (isinstance(w_id, str) and weights_node.kernel_attr in w_id):
+                if w_id not in weights and not (isinstance(w_id, str) and kernel_attr in w_id):
                     weights[w_id] = w
                     continue
                 # if same identifier is used as in weight nodes (or contains the kernel substring), generate a new
@@ -180,7 +185,7 @@ class VirtualActivationWeightsNode(VirtualNode):
         self.original_weights_node = weights_node
         v_candidates = []
-        weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(weights_node.kernel_attr)
+        weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(kernel_attr)
         for c_a in act_node.candidates_quantization_cfg:
             for c_w in weights_candidates_quantization_cfg:
                 composed_candidate = CandidateNodeQuantizationConfig(activation_quantization_cfg=c_a.activation_quantization_cfg,
@@ -198,8 +203,7 @@ class VirtualActivationWeightsNode(VirtualNode):
                 v_candidates.append(composed_candidate)
         # sorting the candidates by weights number of bits first and then by activation number of bits (reversed order)
-        v_candidates.sort(key=lambda c: (c.weights_quantization_cfg.get_attr_config(weights_node.kernel_attr).weights_n_bits,
+        v_candidates.sort(key=lambda c: (c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits,
                                          c.activation_quantization_cfg.activation_n_bits), reverse=True)
-        self.quantization_cfg = NodeQuantizationConfig(candidates_quantization_cfg=v_candidates,
-                                                       base_quantization_cfg=None, validate=False)
+        self.candidates_quantization_cfg = v_candidates

model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py CHANGED Viewed

@@ -37,18 +37,20 @@ def set_bit_widths(mixed_precision_enable: bool,
     """
     if mixed_precision_enable:
         assert all([len(n.candidates_quantization_cfg) > 0
-                    for n in graph.get_configurable_sorted_nodes()]), \
+                    for n in graph.get_configurable_sorted_nodes(graph.fw_info)]), \
             "All configurable nodes in graph should have at least one candidate configuration in mixed precision mode"
         # Get a list of nodes' names we need to finalize (that they have at least one weight qc candidate).
-        sorted_nodes_names = graph.get_configurable_sorted_nodes_names()
+        sorted_nodes_names = graph.get_configurable_sorted_nodes_names(graph.fw_info)
         for node in graph.nodes:  # set a specific node qc for each node final qc
             # If it's reused, take the configuration that the base node has
             node_name = node.name if not node.reuse else '_'.join(node.name.split('_')[:-2])
             if node_name in sorted_nodes_names:  # only configurable nodes are in this list
                 node_index_in_graph = sorted_nodes_names.index(node_name)
-                _set_node_final_qc(bit_widths_config[node_index_in_graph], node)
+                _set_node_final_qc(bit_widths_config[node_index_in_graph],
+                                   node,
+                                   graph.fw_info)
             else:
                 if node.is_activation_quantization_enabled():
                     # If we are here, this means that we are in weights-only mixed-precision
@@ -81,7 +83,8 @@ def set_bit_widths(mixed_precision_enable: bool,
 def _get_node_qc_by_bit_widths(node: BaseNode,
-                               node_bit_width_cfg: int) -> Any:
+                               node_bit_width_cfg: int,
+                               fw_info) -> Any:
     """
     Get the node's quantization configuration that
     matches to the bit width index as in the MP configuration bit_width_cfg.
@@ -90,18 +93,21 @@ def _get_node_qc_by_bit_widths(node: BaseNode,
     Args:
         node: Node to get its quantization configuration candidate.
         node_bit_width_cfg: Configuration which determines the node's desired bit width.
+        fw_info: Information relevant to a specific framework about how layers should be quantized.
     Returns:
         Node quantization configuration if it was found, or None otherwise.
     """
     # only the weights kernel attribute is quantized in weights mixed precision at the moment
+    kernel_attr = fw_info.get_kernel_op_attributes(node.type)
     if node.is_activation_quantization_enabled():
         qc = node.candidates_quantization_cfg[node_bit_width_cfg]
         return qc
-    elif node.kernel_attr is not None:
-        if node.is_weights_quantization_enabled(node.kernel_attr):
+    elif kernel_attr is not None:
+        if node.is_weights_quantization_enabled(kernel_attr[0]):
             qc = node.candidates_quantization_cfg[node_bit_width_cfg]
             return qc
@@ -110,7 +116,8 @@ def _get_node_qc_by_bit_widths(node: BaseNode,
 def _set_node_final_qc(node_bit_width_cfg: int,
-                       node: BaseNode):
+                       node: BaseNode,
+                       fw_info):
     """
     Get the node's quantization configuration that
     matches to the bit width index as in the MP configuration bit_width_cfg, and use it to finalize the node's
@@ -120,9 +127,12 @@ def _set_node_final_qc(node_bit_width_cfg: int,
     Args:
         node_bit_width_cfg: Configuration which determines the node's desired bit width.
         node: Node to set its node quantization configuration.
+        fw_info: Information relevant to a specific framework about how layers should be quantized.
     """
-    node_qc = _get_node_qc_by_bit_widths(node, node_bit_width_cfg)
+    node_qc = _get_node_qc_by_bit_widths(node,
+                                         node_bit_width_cfg,
+                                         fw_info)
     if node_qc is None:
         Logger.critical(f'Node {node.name} quantization configuration from configuration file'  # pragma: no cover

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl