PyPI - mct-nightly - Versions diffs - 2.4.0.20250616.616__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250616.616py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

model_compression_toolkit/core/common/graph/base_node.py CHANGED Viewed

@@ -14,10 +14,11 @@
 # ==============================================================================
 import copy
-from typing import Dict, Any, Tuple, List, Type, Union
+from typing import Dict, Any, Tuple, List, Type, Union, NamedTuple
 import numpy as np
+from model_compression_toolkit.core.common.framework_info import get_fw_info, ChannelAxisMapping
 from model_compression_toolkit.constants import WEIGHTS_NBITS_ATTRIBUTE, CORRECTED_BIAS_ATTRIBUTE, \
     ACTIVATION_N_BITS_ATTRIBUTE, FP32_BYTES_PER_PARAMETER
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
@@ -34,11 +35,21 @@ from model_compression_toolkit.target_platform_capabilities.targetplatform2frame
 WeightAttrT = Union[str, int]
+class NodeFrameworkInfo(NamedTuple):
+    """
+    Node's specific framework information.
+    """
+    channel_axis: ChannelAxisMapping
+    out_channel_axis: int
+    minmax: Tuple[float, float]
+    kernel_attr: str
+    is_kernel_op: bool
 class BaseNode:
     """
     Class to represent a node in a graph that represents the model.
     """
     def __init__(self,
                  name: str,
                  framework_attr: Dict[str, Any],
@@ -88,6 +99,78 @@ class BaseNode:
         self.prior_info = None
         self.has_activation = has_activation
         self.is_custom = is_custom
+        self.node_fw_info = self._get_fw_node_attrs(layer_class, framework_attr)
+    def _get_fw_node_attrs(self, node_type, framework_attr):
+        fw_info = get_fw_info()
+        return None if fw_info is None else NodeFrameworkInfo(
+            fw_info.get_kernel_channels(node_type),
+            fw_info.get_out_channel_axis(node_type),
+            fw_info.get_layer_min_max(node_type, framework_attr),
+            fw_info.get_kernel_op_attribute(node_type),
+            fw_info.is_kernel_op(node_type)
+        )
+    def _assert_fw_info_exists(self):
+        """
+        Verify NodeFrameworkInfo was initialized.
+        """
+        assert self.node_fw_info is not None, f"NodeFrameworkInfo not initialized for node {self.name}"  # pragma: no cover
+    @property
+    def channel_axis(self) -> ChannelAxisMapping:
+        """
+        Extract channels axis from node's NodeFrameworkInfo.
+        Returns:
+            Channels axis named tuple.
+        """
+        self._assert_fw_info_exists()
+        return self.node_fw_info.channel_axis
+    @property
+    def out_channel_axis(self) -> int:
+        """
+        Extract output channel axis from node's NodeFrameworkInfo.
+        Returns:
+            Output channel axis.
+        """
+        self._assert_fw_info_exists()
+        return self.node_fw_info.out_channel_axis
+    @property
+    def minmax(self) -> Tuple[float, float]:
+        """
+        Extract expected min-max activation values from node's NodeFrameworkInfo.
+        Returns:
+            A tuple of min-max values.
+        """
+        self._assert_fw_info_exists()
+        return self.node_fw_info.minmax
+    @property
+    def kernel_attr(self) -> str:
+        """
+        Extract kernel name from node's NodeFrameworkInfo.
+        Returns:
+            Kernel name.
+        """
+        self._assert_fw_info_exists()
+        return self.node_fw_info.kernel_attr
+    @property
+    def is_kernel_op(self) -> bool:
+        """
+        Check if kernel exists for the node.
+        Returns:
+            Whether the node has a kernel or not.
+        """
+        self._assert_fw_info_exists()
+        return self.node_fw_info.is_kernel_op
     @property
     def type(self):
@@ -298,14 +381,11 @@ class BaseNode:
         return input_tensors
-    def get_num_parameters(self, fw_info) -> Tuple[int,int]:
+    def get_num_parameters(self) -> Tuple[int,int]:
         """
         Compute the number of parameters the node holds.
         It returns a tuple: Number of quantized parameters, number of float parameters.
-        Args:
-            fw_info: Framework info to decide which attributes should be quantized.
         Returns:
             A tuple of (Number of quantized parameters, number of float parameters).
@@ -314,11 +394,10 @@ class BaseNode:
         q_node_num_params = 0
-        for attr in fw_info.get_kernel_op_attributes(self.type):
-            if attr is not None:
-                w = self.get_weights_by_keys(attr)
-                if w is not None:
-                    q_node_num_params += w.flatten().shape[0]
+        if self.kernel_attr is not None:
+            w = self.get_weights_by_keys(self.kernel_attr)
+            if w is not None:
+                q_node_num_params += w.flatten().shape[0]
         f_node_num_params = total_node_params - q_node_num_params
@@ -326,22 +405,19 @@ class BaseNode:
         assert int(f_node_num_params) == f_node_num_params
         return int(q_node_num_params), int(f_node_num_params)
-    def get_memory_bytes(self, fw_info) -> float:
+    def get_memory_bytes(self) -> float:
         """
         Compute the number of bytes the node's memory requires.
-        Args:
-            fw_info: Framework info to decide which attributes should be quantized.
         Returns: Number of bytes the node's memory requires.
         """
         # TODO: this method is used for tensorboard only. If we want to enable logging of other attributes memory
         #  then it needs to be modified. But, it might be better to remove this method from the BaseNode completely.
-        kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
+        kernel_attr = self.kernel_attr
         if kernel_attr is None:
             return 0
-        q_params, f_params = self.get_num_parameters(fw_info)
+        q_params, f_params = self.get_num_parameters()
         if self.final_weights_quantization_cfg is None:  # float coefficients
             memory = (f_params+q_params) * FP32_BYTES_PER_PARAMETER
         else:
@@ -351,15 +427,12 @@ class BaseNode:
         return memory
-    def get_unified_weights_candidates_dict(self, fw_info) -> Dict[str, Any]:
+    def get_unified_weights_candidates_dict(self) -> Dict[str, Any]:
         """
         In Mixed-Precision, a node's kernel can have multiple candidates for weights quantization configuration.
         In order to display a single view of a node (for example, for logging in TensorBoard) we need a way
         to create a single dictionary from all candidates.
-        This method is aimed to build such an unified dictionary for a node.
-        Args:
-            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
+        This method is aimed to build such a unified dictionary for a node.
         Returns: A dictionary containing information from node's weight quantization configuration candidates.
@@ -369,7 +442,7 @@ class BaseNode:
         # We assume that only the kernel attribute have more than one candidate, since we only allow to
         # quantize the kernel using mixed precision
         # TODO: need to modify if we want to present a unified config for other attributes
-        kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
+        kernel_attr = self.kernel_attr
         if kernel_attr is None:
             # This node doesn't have a kernel attribute
             return {}
@@ -437,20 +510,13 @@ class BaseNode:
         candidates = self.get_all_weights_attr_candidates(attr)
         return all(candidate == candidates[0] for candidate in candidates[1:])
-    def has_kernel_weight_to_quantize(self, fw_info):
+    def has_kernel_weight_to_quantize(self):
         """
-        Checks whether the node has kernel attribute that need to be quantized according to the framework info.
+        Checks whether the node has kernel attribute that need to be quantized according to the node's framework info.
-        Args:
-            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
-        Returns: Whether the node has weights that need to be quantized.
+        Returns: Whether the node's kernel need to be quantized.
         """
-        attrs = fw_info.get_kernel_op_attributes(self.type)
-        for attr in attrs:
-            if attr and self.get_weights_by_keys(attr) is not None:
-                return True
-        return False
+        return self.kernel_attr and self.get_weights_by_keys(self.kernel_attr) is not None
     def has_any_weight_attr_to_quantize(self) -> bool:
         """
@@ -724,7 +790,7 @@ class BaseNode:
             Logger.critical(f"SIMD is expected to be a non-positive integer but found: {_simd}")
         return _simd
-    def sort_node_candidates(self, fw_info):
+    def sort_node_candidates(self):
         """
         Sorts the node candidates.
         We assume that the candidates are ordered in the following way (for mixed precision purposes):
@@ -733,16 +799,11 @@ class BaseNode:
             - If the node doesn't have a kernel we only consider the candidate activation number of bits to sort
             the candidates in descending order.
         The operation is done inplace.
-        Args:
-            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         """
         if self.candidates_quantization_cfg is not None:
-            kernel_attr = fw_info.get_kernel_op_attributes(self.type)[0]
-            if kernel_attr is not None:
+            if self.kernel_attr is not None:
                 self.candidates_quantization_cfg.sort(
-                    key=lambda c: (c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits,
+                    key=lambda c: (c.weights_quantization_cfg.get_attr_config(self.kernel_attr).weights_n_bits,
                                    c.activation_quantization_cfg.activation_n_bits), reverse=True)
             else:
                 self.candidates_quantization_cfg.sort(key=lambda c: c.activation_quantization_cfg.activation_n_bits,

model_compression_toolkit/core/common/graph/functional_node.py CHANGED Viewed

@@ -1,6 +1,21 @@
+# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 from typing import Dict, Any, Tuple, Type, List, Union
-from model_compression_toolkit.verify_packages import FOUND_TF
+from model_compression_toolkit.core.common.framework_info import get_fw_info
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 import numpy as np
@@ -45,6 +60,7 @@ class FunctionalNode(BaseNode):
             inputs_as_list: Whether to pass the node its input tensors as a list or not when calling the layer.
             has_activation: Whether the node has activations that we might want to quantize.
             tensor_input_allocs: A list of indices and strings for allocations input tensors in the node's args and kwargs.
         """
         super().__init__(name,
@@ -63,6 +79,7 @@ class FunctionalNode(BaseNode):
         self.op_call_args = list(op_call_args)
         self.functional_op = functional_op
         self.tensor_input_allocs = [] if tensor_input_allocs is None else tensor_input_allocs
+        self.node_fw_info = self._get_fw_node_attrs(functional_op, framework_attr)
     @property
     def type(self):

model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py CHANGED Viewed

@@ -15,14 +15,13 @@
 import abc
 import uuid
-from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.constants import VIRTUAL_ACTIVATION_WEIGHTS_NODE_PREFIX, \
     VIRTUAL_WEIGHTS_SUFFIX, VIRTUAL_ACTIVATION_SUFFIX, FLOAT_BITWIDTH
-from model_compression_toolkit.core.common.framework_info import DEFAULT_KERNEL_ATTRIBUTES
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
 from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
+from model_compression_toolkit.core.common.framework_info import DEFAULT_KERNEL_ATTRIBUTE
 class VirtualNode(BaseNode, abc.ABC):
@@ -128,28 +127,23 @@ class VirtualActivationWeightsNode(VirtualNode):
     def __init__(self,
                  act_node: BaseNode,
-                 weights_node: BaseNode,
-                 fw_info: FrameworkInfo):
+                 weights_node: BaseNode):
         """
         Init a VirtualActivationWeightsNode object.
         Args:
             act_node: The original activation node.
             weights_node: The original weights node.
-            fw_info: A FrameworkInfo object with framework specific information.
         """
         # Validate weights node
-        kernel_attrs = fw_info.get_kernel_op_attributes(weights_node.type)
-        assert len(kernel_attrs) == 1 and kernel_attrs[0] is not None, f'Expected exactly one kernel attr, {kernel_attrs}'
-        kernel_attr = kernel_attrs[0]
         conf_weights = [attr for attr in weights_node.weights if weights_node.is_configurable_weight(attr)]
-        if len(conf_weights) > 1 or len(conf_weights) == 1 and not weights_node.is_configurable_weight(kernel_attr):
+        if len(conf_weights) > 1 or len(conf_weights) == 1 and not weights_node.is_configurable_weight(weights_node.kernel_attr):
             raise NotImplementedError(f'Only kernel weight can be configurable. Got configurable {conf_weights}.')
         weights = weights_node.weights.copy()
         act_node_w_rename = {}
         if act_node.weights:
-            if fw_info.get_kernel_op_attributes(act_node) != DEFAULT_KERNEL_ATTRIBUTES:
+            if act_node.kernel_attr != DEFAULT_KERNEL_ATTRIBUTE:
                 raise NotImplementedError(f'Node {act_node} with kernel cannot be used as activation for '
                                           f'VirtualActivationWeightsNode.')
             if act_node.has_any_configurable_weight():
@@ -157,7 +151,7 @@ class VirtualActivationWeightsNode(VirtualNode):
                                           'VirtualActivationWeightsNode.')
             # combine weights from activation and weights
             for w_id, w in act_node.weights.items():
-                if w_id not in weights and not (isinstance(w_id, str) and kernel_attr in w_id):
+                if w_id not in weights and not (isinstance(w_id, str) and weights_node.kernel_attr in w_id):
                     weights[w_id] = w
                     continue
                 # if same identifier is used as in weight nodes (or contains the kernel substring), generate a new
@@ -185,7 +179,7 @@ class VirtualActivationWeightsNode(VirtualNode):
         self.original_weights_node = weights_node
         v_candidates = []
-        weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(kernel_attr)
+        weights_candidates_quantization_cfg = weights_node.get_unique_weights_candidates(weights_node.kernel_attr)
         for c_a in act_node.candidates_quantization_cfg:
             for c_w in weights_candidates_quantization_cfg:
                 composed_candidate = CandidateNodeQuantizationConfig(activation_quantization_cfg=c_a.activation_quantization_cfg,
@@ -203,7 +197,7 @@ class VirtualActivationWeightsNode(VirtualNode):
                 v_candidates.append(composed_candidate)
         # sorting the candidates by weights number of bits first and then by activation number of bits (reversed order)
-        v_candidates.sort(key=lambda c: (c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits,
+        v_candidates.sort(key=lambda c: (c.weights_quantization_cfg.get_attr_config(weights_node.kernel_attr).weights_n_bits,
                                          c.activation_quantization_cfg.activation_n_bits), reverse=True)
         self.candidates_quantization_cfg = v_candidates

model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py CHANGED Viewed

@@ -37,20 +37,18 @@ def set_bit_widths(mixed_precision_enable: bool,
     """
     if mixed_precision_enable:
         assert all([len(n.candidates_quantization_cfg) > 0
-                    for n in graph.get_configurable_sorted_nodes(graph.fw_info)]), \
+                    for n in graph.get_configurable_sorted_nodes()]), \
             "All configurable nodes in graph should have at least one candidate configuration in mixed precision mode"
         # Get a list of nodes' names we need to finalize (that they have at least one weight qc candidate).
-        sorted_nodes_names = graph.get_configurable_sorted_nodes_names(graph.fw_info)
+        sorted_nodes_names = graph.get_configurable_sorted_nodes_names()
         for node in graph.nodes:  # set a specific node qc for each node final qc
             # If it's reused, take the configuration that the base node has
             node_name = node.name if not node.reuse else '_'.join(node.name.split('_')[:-2])
             if node_name in sorted_nodes_names:  # only configurable nodes are in this list
                 node_index_in_graph = sorted_nodes_names.index(node_name)
-                _set_node_final_qc(bit_widths_config[node_index_in_graph],
-                                   node,
-                                   graph.fw_info)
+                _set_node_final_qc(bit_widths_config[node_index_in_graph], node)
             else:
                 if node.is_activation_quantization_enabled():
                     # If we are here, this means that we are in weights-only mixed-precision
@@ -83,8 +81,7 @@ def set_bit_widths(mixed_precision_enable: bool,
 def _get_node_qc_by_bit_widths(node: BaseNode,
-                               node_bit_width_cfg: int,
-                               fw_info) -> Any:
+                               node_bit_width_cfg: int) -> Any:
     """
     Get the node's quantization configuration that
     matches to the bit width index as in the MP configuration bit_width_cfg.
@@ -93,21 +90,18 @@ def _get_node_qc_by_bit_widths(node: BaseNode,
     Args:
         node: Node to get its quantization configuration candidate.
         node_bit_width_cfg: Configuration which determines the node's desired bit width.
-        fw_info: Information relevant to a specific framework about how layers should be quantized.
     Returns:
         Node quantization configuration if it was found, or None otherwise.
     """
     # only the weights kernel attribute is quantized in weights mixed precision at the moment
-    kernel_attr = fw_info.get_kernel_op_attributes(node.type)
     if node.is_activation_quantization_enabled():
         qc = node.candidates_quantization_cfg[node_bit_width_cfg]
         return qc
-    elif kernel_attr is not None:
-        if node.is_weights_quantization_enabled(kernel_attr[0]):
+    elif node.kernel_attr is not None:
+        if node.is_weights_quantization_enabled(node.kernel_attr):
             qc = node.candidates_quantization_cfg[node_bit_width_cfg]
             return qc
@@ -116,8 +110,7 @@ def _get_node_qc_by_bit_widths(node: BaseNode,
 def _set_node_final_qc(node_bit_width_cfg: int,
-                       node: BaseNode,
-                       fw_info):
+                       node: BaseNode):
     """
     Get the node's quantization configuration that
     matches to the bit width index as in the MP configuration bit_width_cfg, and use it to finalize the node's
@@ -127,12 +120,9 @@ def _set_node_final_qc(node_bit_width_cfg: int,
     Args:
         node_bit_width_cfg: Configuration which determines the node's desired bit width.
         node: Node to set its node quantization configuration.
-        fw_info: Information relevant to a specific framework about how layers should be quantized.
     """
-    node_qc = _get_node_qc_by_bit_widths(node,
-                                         node_bit_width_cfg,
-                                         fw_info)
+    node_qc = _get_node_qc_by_bit_widths(node, node_bit_width_cfg)
     if node_qc is None:
         Logger.critical(f'Node {node.name} quantization configuration from configuration file'  # pragma: no cover

model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py CHANGED Viewed

@@ -22,7 +22,6 @@ from model_compression_toolkit.target_platform_capabilities.targetplatform2frame
 def filter_candidates_for_mixed_precision(graph: Graph,
                                           target_resource_utilization: ResourceUtilization,
-                                          fw_info: FrameworkInfo,
                                           fqc: FrameworkQuantizationCapabilities):
     """
     Filters out candidates in case of mixed precision search for only weights or activation compression.
@@ -35,7 +34,6 @@ def filter_candidates_for_mixed_precision(graph: Graph,
     Args:
         graph: A graph representation of the model to be quantized.
         target_resource_utilization: The resource utilization of the target device.
-        fw_info: fw_info: Information needed for quantization about the specific framework.
         fqc: FrameworkQuantizationCapabilities object that describes the desired inference target platform.
     """
@@ -59,11 +57,10 @@ def filter_candidates_for_mixed_precision(graph: Graph,
     elif tru.activation_restricted() and not tru.weight_restricted():
         # Running mixed precision for activation compression only -
         # filter out candidates weights only configurable node
-        weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes(fw_info)]
+        weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes()]
         for n in weight_configurable_nodes:
-            kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
-            base_cfg_nbits = n.get_qco(fqc).base_config.attr_weights_configs_mapping[kernel_attr].weights_n_bits
+            base_cfg_nbits = n.get_qco(fqc).base_config.attr_weights_configs_mapping[n.kernel_attr].weights_n_bits
             filtered_conf = [c for c in n.candidates_quantization_cfg if
-                             c.weights_quantization_cfg.get_attr_config(kernel_attr).enable_weights_quantization and
-                             c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits == base_cfg_nbits]
+                             c.weights_quantization_cfg.get_attr_config(n.kernel_attr).enable_weights_quantization and
+                             c.weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_n_bits == base_cfg_nbits]
             n.candidates_quantization_cfg = filtered_conf

model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py CHANGED Viewed

@@ -30,11 +30,10 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
 class MixedPrecisionRUHelper:
     """ Helper class for resource utilization computations for mixed precision optimization. """
-    def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation):
+    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation):
         self.graph = graph
-        self.fw_info = fw_info
         self.fw_impl = fw_impl
-        self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
+        self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl)
     def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Dict[BaseNode, int]) -> Dict[RUTarget, np.ndarray]:
         """

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py CHANGED Viewed

@@ -35,7 +35,6 @@ class BitWidthSearchMethod(Enum):
 def search_bit_width(graph: Graph,
-                     fw_info: FrameworkInfo,
                      fw_impl: FrameworkImplementation,
                      target_resource_utilization: ResourceUtilization,
                      mp_config: MixedPrecisionQuantizationConfig,
@@ -52,7 +51,6 @@ def search_bit_width(graph: Graph,
     Args:
         graph: Graph to search a MP configuration for.
-        fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
         target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
         mp_config: Mixed-precision quantization configuration.
@@ -79,7 +77,7 @@ def search_bit_width(graph: Graph,
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
-    se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info,
+    se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen,
                                fw_impl=fw_impl, disable_activation_for_metric=disable_activation_for_metric,
                                hessian_info_service=hessian_info_service)
@@ -93,7 +91,6 @@ def search_bit_width(graph: Graph,
     # Search manager and LP are highly coupled, so LP search method was moved inside search manager.
     search_manager = MixedPrecisionSearchManager(graph,
-                                                 fw_info=fw_info,
                                                  fw_impl=fw_impl,
                                                  sensitivity_evaluator=se,
                                                  target_resource_utilization=target_resource_utilization,
@@ -105,6 +102,6 @@ def search_bit_width(graph: Graph,
     if mp_config.refine_mp_solution:
         nodes_bit_cfg = greedy_solution_refinement_procedure(nodes_bit_cfg, search_manager, target_resource_utilization)
-    topo_bit_cfg = [nodes_bit_cfg[n] for n in graph.get_configurable_sorted_nodes(fw_info)]
+    topo_bit_cfg = [nodes_bit_cfg[n] for n in graph.get_configurable_sorted_nodes()]
     assert len(topo_bit_cfg) == len(nodes_bit_cfg)
     return topo_bit_cfg

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -53,7 +53,6 @@ class MixedPrecisionSearchManager:
     def __init__(self,
                  graph: Graph,
-                 fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
                  target_resource_utilization: ResourceUtilization,
@@ -62,14 +61,12 @@ class MixedPrecisionSearchManager:
         Args:
             graph: Graph to search for its MP configuration.
-            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
             fw_impl: FrameworkImplementation object with specific framework methods implementation.
             sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
                 a bit-width configuration for the MP model.
             target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
         """
-        self.fw_info = fw_info
         self.fw_impl = fw_impl
         self.original_graph = graph
@@ -81,12 +78,12 @@ class MixedPrecisionSearchManager:
         self.target_resource_utilization = target_resource_utilization
         self.mp_config = mp_config
-        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
+        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes()
         self.ru_targets = target_resource_utilization.get_restricted_targets()
-        self.orig_graph_ru_helper = MixedPrecisionRUHelper(self.original_graph, fw_info, fw_impl)
+        self.orig_graph_ru_helper = MixedPrecisionRUHelper(self.original_graph, fw_impl)
-        self.min_ru_config: Dict[BaseNode, int] = self.mp_graph.get_min_candidates_config(fw_info)
+        self.min_ru_config: Dict[BaseNode, int] = self.mp_graph.get_min_candidates_config()
         self.config_reconstructor = None
         orig_min_config = self.min_ru_config

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py CHANGED Viewed

@@ -124,10 +124,9 @@ class ResourceUtilizationCalculator:
     unexpected_qc_error = 'Custom quantization configuration is not expected for non-custom bit mode.'
     unexpected_qc_nodes_error = 'Custom quantization configuration contains unexpected node names.'
-    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo):
+    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation):
         self.graph = graph
         self.fw_impl = fw_impl
-        self.fw_info = fw_info
         # Currently we go over the full graph even if utilization won't be requested for all nodes.
         # We could fill the cache on the fly only for requested nodes, but it's probably negligible.
@@ -544,14 +543,10 @@ class ResourceUtilizationCalculator:
         self._validate_custom_qcs(w_qc, bitwidth_mode)
         # check if the node has kernel
-        kernel_attrs = self.fw_info.get_kernel_op_attributes(n.type)
-        if len(kernel_attrs) > 1:  # pragma: no cover
-            raise NotImplementedError('Multiple kernel attributes are not supported for BOPS computation.')
-        if not kernel_attrs or not kernel_attrs[0]:
+        if not n.kernel_attr:
             return 0
-        kernel_attr = kernel_attrs[0]
-        node_mac = self.fw_impl.get_node_mac_operations(n, self.fw_info)
+        node_mac = self.fw_impl.get_node_mac_operations(n)
         if node_mac == 0:
             return node_mac
@@ -559,12 +554,12 @@ class ResourceUtilizationCalculator:
         assert len(prev_nodes) == 1, f'Weights node is expected to have exactly one input, {n} has {len(prev_nodes)}'
         a_node = prev_nodes[0]
         if (target_criterion == TargetInclusionCriterion.AnyQuantized and
-                not (a_node.is_activation_quantization_enabled() or n.is_weights_quantization_enabled(kernel_attr))):
+                not (a_node.is_activation_quantization_enabled() or n.is_weights_quantization_enabled(n.kernel_attr))):
             return 0
         act_qc = self._extract_qc(a_node, act_qcs)
         a_nbits = self._get_activation_nbits(a_node, bitwidth_mode, act_qc)
-        w_nbits = self._get_weight_nbits(n, kernel_attr, bitwidth_mode, w_qc)
+        w_nbits = self._get_weight_nbits(n, n.kernel_attr, bitwidth_mode, w_qc)
         node_bops = a_nbits * w_nbits * node_mac
         return node_bops

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import copy
 from typing import Callable, Any
-from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
+from model_compression_toolkit.core import ResourceUtilization, CoreConfig, QuantizationErrorMethod
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
     ResourceUtilizationCalculator, BitwidthMode, TargetInclusionCriterion
@@ -27,7 +27,6 @@ def compute_resource_utilization_data(in_model: Any,
                                       representative_data_gen: Callable,
                                       core_config: CoreConfig,
                                       fqc: FrameworkQuantizationCapabilities,
-                                      fw_info: FrameworkInfo,
                                       fw_impl: FrameworkImplementation) -> ResourceUtilization:
     """
     Compute Resource Utilization of a model with the default single precision quantization.
@@ -39,7 +38,6 @@ def compute_resource_utilization_data(in_model: Any,
         core_config: CoreConfig containing parameters of how the model should be quantized.
         fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
                                               the attached framework operator's information.
-        fw_info: Information needed for quantization about the specific framework.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -55,12 +53,11 @@ def compute_resource_utilization_data(in_model: Any,
     transformed_graph = graph_preparation_runner(in_model,
                                                  representative_data_gen=representative_data_gen,
                                                  quantization_config=core_config.quantization_config,
-                                                 fw_info=fw_info,
                                                  fw_impl=fw_impl,
                                                  fqc=fqc,
                                                  bit_width_config=core_config.bit_width_config,
                                                  mixed_precision_enable=False,
                                                  running_gptq=False)
-    ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
+    ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl)
     return ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantizedNonFused, BitwidthMode.QDefaultSP)

mct-nightly 2.4.0.20250616.616__py3-none-any.whl → 2.4.0.20250618.606__py3-none-any.whl

mct-nightly 2.4.0.20250616.616py3-none-any.whl → 2.4.0.20250618.606py3-none-any.whl