PyPI - mct-nightly - Versions diffs - 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl - Mend

mct-nightly 2.2.0.20250113.134913py3-none-any.whl → 2.2.0.20250114.134534py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (106) hide show

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -13,23 +13,24 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, Tuple
-from typing import Dict, List
+from typing import Callable, Dict, List
 import numpy as np
 from model_compression_toolkit.core.common import BaseNode
-from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
     VirtualSplitWeightsNode, VirtualSplitActivationNode
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
-from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
+    RUTarget, ResourceUtilization
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
+    TargetInclusionCriterion, BitwidthMode
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
+    MixedPrecisionRUHelper
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.logger import Logger
 class MixedPrecisionSearchManager:
@@ -42,7 +43,6 @@ class MixedPrecisionSearchManager:
                  fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
-                 ru_functions: Dict[RUTarget, RuFunctions],
                  target_resource_utilization: ResourceUtilization,
                  original_graph: Graph = None):
         """
@@ -53,8 +53,6 @@ class MixedPrecisionSearchManager:
             fw_impl: FrameworkImplementation object with specific framework methods implementation.
             sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
                 a bit-width configuration for the MP model.
-            ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to
-                a couple of resource utilization metric function and resource utilization aggregation function.
             target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
             original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
                 will contain the original graph (for config reconstruction purposes).
@@ -69,29 +67,23 @@ class MixedPrecisionSearchManager:
         self.compute_metric_fn = self.get_sensitivity_metric()
         self._cuts = None
-        ru_types = [ru_target for ru_target, ru_value in
-                    target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
-        self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
+        # To define RU Total constraints we need to compute weights and activations even if they have no constraints
+        # TODO currently this logic is duplicated in linear_programming.py
+        targets = target_resource_utilization.get_restricted_metrics()
+        if RUTarget.TOTAL in targets:
+            targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
+        self.ru_targets_to_compute = targets
+        self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
         self.target_resource_utilization = target_resource_utilization
         self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
-        self.min_ru = self.compute_min_ru()
-        self.non_conf_ru_dict = self._non_configurable_nodes_ru()
+        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
+        self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
                                                                        original_graph=self.original_graph)
-    @property
-    def cuts(self) -> List[Cut]:
-        """
-        Calculates graph cuts. Written as property, so it will only be calculated once and
-        only if cuts are needed.
-        """
-        if self._cuts is None:
-            self._cuts = calc_graph_cuts(self.original_graph)
-        return self._cuts
     def get_search_space(self) -> Dict[int, List[int]]:
         """
         The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
@@ -122,55 +114,17 @@ class MixedPrecisionSearchManager:
         return self.sensitivity_evaluator.compute_metric
-    def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
-        """
-        Computes a resource utilization for a certain mixed precision configuration.
-        The method computes a resource utilization vector for specific target resource utilization.
-        Returns: resource utilization value.
-        """
-        # ru_fn is a pair of resource utilization computation method and
-        # resource utilization aggregation method (in this method we only need the first one)
-        if ru_target is RUTarget.ACTIVATION:
-            return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
-        else:
-            return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)
-    def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
-        """
-        Computes a resource utilization vector with the values matching to the minimal mp configuration
-        (i.e., each node is configured with the quantization candidate that would give the minimal size of the
-        node's resource utilization).
-        The method computes the minimal resource utilization vector for each target resource utilization.
-        Returns: A dictionary mapping each target resource utilization to its respective minimal
-        resource utilization values.
-        """
-        min_ru = {}
-        for ru_target, ru_fn in self.compute_ru_functions.items():
-            # ru_fns is a pair of resource utilization computation method and
-            # resource utilization aggregation method (in this method we only need the first one)
-            min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)
-        return min_ru
     def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
         """
         Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
-        The matrix is constructed as follows (for a given target):
-        - Each row represents the set of resource utilization values for a specific resource utilization
-            measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
-        - Each entry in a specific column represents the resource utilization value of a given configuration
-            (single layer is configured with specific candidate, all other layer are at the minimal resource
-            utilization configuration) for the resource utilization measure of the respective row.
+        Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
         Args:
             target: The resource target for which the resource utilization is calculated (a RUTarget value).
-        Returns: A resource utilization matrix.
+        Returns:
+            A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
+            depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
         """
         assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
@@ -180,54 +134,14 @@ class MixedPrecisionSearchManager:
         for c, c_n in enumerate(configurable_sorted_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
-                    # skip ru computation for min configuration. Since we compute the difference from min_ru it'll
-                    # always be 0 for all entries in the results vector.
-                    candidate_rus = np.zeros(shape=self.min_ru[target].shape)
+                    candidate_rus = self.min_ru[target]
                 else:
-                    candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target)
-                ru_matrix.append(np.asarray(candidate_rus))
-        # We need to transpose the calculated ru matrix to allow later multiplication with
-        # the indicators' diagonal matrix.
-        # We only move the first axis (num of configurations) to be last,
-        # the remaining axes include the metric specific nodes (rows dimension of the new tensor)
-        # and the ru metric values (if they are non-scalars)
-        np_ru_matrix = np.array(ru_matrix)
-        return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
-    def compute_candidate_relative_ru(self,
-                                      conf_node_idx: int,
-                                      candidate_idx: int,
-                                      target: RUTarget) -> np.ndarray:
-        """
-        Computes a resource utilization vector for a given candidates of a given configurable node,
-        i.e., the matching resource utilization vector which is obtained by computing the given target's
-        resource utilization function on a minimal configuration in which the given
-        layer's candidates is changed to the new given one.
-        The result is normalized by subtracting the target's minimal resource utilization vector.
+                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
-        Args:
-            conf_node_idx: The index of a node in a sorted configurable nodes list.
-            candidate_idx: The index of a node's quantization configuration candidate.
-            target: The target for which the resource utilization is calculated (a RUTarget value).
-        Returns: Normalized node's resource utilization vector
-        """
-        return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \
-               self.get_min_target_resource_utilization(target)
-    def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray:
-        """
-        Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target.
-        Args:
-            target: The target for which the resource utilization is calculated (a RUTarget value).
-        Returns: Minimal resource utilization vector.
+                ru_matrix.append(np.asarray(candidate_rus))
-        """
-        return self.min_ru[target]
+        np_ru_matrix = np.array(ru_matrix) - self.min_ru[target]    # num configurations X num elements
+        return np_ru_matrix
     def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
         """
@@ -243,7 +157,7 @@ class MixedPrecisionSearchManager:
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
+        return self.ru_helper.compute_utilization({target}, cfg)[target]
     @staticmethod
     def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -263,29 +177,6 @@ class MixedPrecisionSearchManager:
         updated_cfg[idx] = value
         return updated_cfg
-    def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
-        """
-        Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
-        resource utilization targets.
-        Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
-        """
-        non_conf_ru_dict = {}
-        for target, ru_fns in self.compute_ru_functions.items():
-            # Call for the ru method of the given target - empty quantization configuration list is passed since we
-            # compute for non-configurable nodes
-            if target == RUTarget.BOPS:
-                ru_vector = None
-            elif target == RUTarget.ACTIVATION:
-                ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
-            else:
-                ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)
-            non_conf_ru_dict[target] = ru_vector
-        return non_conf_ru_dict
     def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
         """
         Computes the resource utilization values for a given mixed-precision configuration.
@@ -297,29 +188,11 @@ class MixedPrecisionSearchManager:
         with the given config.
         """
-        ru_dict = {}
-        for ru_target, ru_fns in self.compute_ru_functions.items():
-            # Passing False to ru methods and aggregations to indicates that the computations
-            # are not for constraints setting
-            if ru_target == RUTarget.BOPS:
-                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
-            elif ru_target == RUTarget.ACTIVATION:
-                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
-            else:
-                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
-            non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
-            if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
-                ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
-            else:
-                ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(
-                    np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)
-            ru_dict[ru_target] = ru_ru[0]
-        config_ru = ResourceUtilization()
-        config_ru.set_resource_utilization_by_target(ru_dict)
-        return config_ru
+        act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
+        ru = self.ru_helper.ru_calculator.compute_resource_utilization(
+            target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
+            w_qcs=w_qcs)
+        return ru
     def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
         """

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py CHANGED Viewed

@@ -12,29 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from dataclasses import dataclass
 from enum import Enum
-from typing import Dict, Any
+from typing import Dict, Any, Set
 import numpy as np
 class RUTarget(Enum):
     """
-    Targets for which we define Resource Utilization metrics for mixed-precision search.
-    For each target that we care to consider in a mixed-precision search, there should be defined a set of
-    resource utilization computation function, resource utilization aggregation function,
-    and resource utilization target (within a ResourceUtilization object).
-    Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum.
-    WEIGHTS - Weights memory ResourceUtilization metric.
-    ACTIVATION - Activation memory ResourceUtilization metric.
-    TOTAL - Total memory ResourceUtilization metric.
-    BOPS - Total Bit-Operations ResourceUtilization Metric.
+    Resource Utilization targets for mixed-precision search.
+    WEIGHTS - Weights memory.
+    ACTIVATION - Activation memory.
+    TOTAL - Total memory.
+    BOPS - Total Bit-Operations.
     """
     WEIGHTS = 'weights'
@@ -43,34 +35,20 @@ class RUTarget(Enum):
     BOPS = 'bops'
+@dataclass
 class ResourceUtilization:
     """
     Class to represent measurements of performance.
-    """
-    def __init__(self,
-                 weights_memory: float = np.inf,
-                 activation_memory: float = np.inf,
-                 total_memory: float = np.inf,
-                 bops: float = np.inf):
-        """
-        Args:
-            weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).
-            activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric.
-            total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric.
-            bops: The total bit-operations in the model.
-        """
-        self.weights_memory = weights_memory
-        self.activation_memory = activation_memory
-        self.total_memory = total_memory
-        self.bops = bops
-    def __repr__(self):
-        return f"Weights_memory: {self.weights_memory}, " \
-               f"Activation_memory: {self.activation_memory}, " \
-               f"Total_memory: {self.total_memory}, " \
-               f"BOPS: {self.bops}"
+    weights_memory: Memory of a model's weights in bytes.
+    activation_memory: Memory of a model's activation in bytes.
+    total_memory: The sum of model's activation and weights memory in bytes.
+    bops: The total bit-operations in the model.
+    """
+    weights_memory: float = np.inf
+    activation_memory: float = np.inf
+    total_memory: float = np.inf
+    bops: float = np.inf
     def weight_restricted(self):
         return self.weights_memory < np.inf
@@ -93,34 +71,30 @@ class ResourceUtilization:
                 RUTarget.TOTAL: self.total_memory,
                 RUTarget.BOPS: self.bops}
-    def set_resource_utilization_by_target(self, ru_mapping: Dict[RUTarget, float]):
+    def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool:
         """
-        Setting a ResourceUtilization object values for each ResourceUtilization target in the given dictionary.
+        Checks whether another ResourceUtilization object satisfies the constraints defined by the current object.
         Args:
-            ru_mapping: A mapping from a RUTarget to a matching resource utilization value.
+            ru: A ResourceUtilization object to check against the current object.
+        Returns:
+            Whether all constraints are satisfied.
         """
-        self.weights_memory = ru_mapping.get(RUTarget.WEIGHTS, np.inf)
-        self.activation_memory = ru_mapping.get(RUTarget.ACTIVATION, np.inf)
-        self.total_memory = ru_mapping.get(RUTarget.TOTAL, np.inf)
-        self.bops = ru_mapping.get(RUTarget.BOPS, np.inf)
+        return bool(ru.weights_memory <= self.weights_memory and \
+                    ru.activation_memory <= self.activation_memory and \
+                    ru.total_memory <= self.total_memory and \
+                    ru.bops <= self.bops)
-    def holds_constraints(self, ru: Any) -> bool:
-        """
-        Checks whether the given ResourceUtilization object holds a set of ResourceUtilization constraints defined by
-        the current ResourceUtilization object.
+    def get_restricted_metrics(self) -> Set[RUTarget]:
+        d = self.get_resource_utilization_dict()
+        return {k for k, v in d.items() if v < np.inf}
-        Args:
-            ru: A ResourceUtilization object to check if it holds the constraints.
-        Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values.
+    def is_any_restricted(self) -> bool:
+        return bool(self.get_restricted_metrics())
-        """
-        if not isinstance(ru, ResourceUtilization):
-            return False
-        return ru.weights_memory <= self.weights_memory and \
-               ru.activation_memory <= self.activation_memory and \
-               ru.total_memory <= self.total_memory and \
-               ru.bops <= self.bops
+    def __repr__(self):
+        return f"Weights_memory: {self.weights_memory}, " \
+               f"Activation_memory: {self.activation_memory}, " \
+               f"Total_memory: {self.total_memory}, " \
+               f"BOPS: {self.bops}"

mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl

mct-nightly 2.2.0.20250113.134913py3-none-any.whl → 2.2.0.20250114.134534py3-none-any.whl