PyPI - mct-nightly - Versions diffs - 2.2.0.20250114.84821__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl - Mend

mct-nightly 2.2.0.20250114.84821py3-none-any.whl → 2.2.0.20250114.134534py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: mct-nightly
-Version: 2.2.0.20250114.84821
+Version: 2.2.0.20250114.134534
 Summary: A Model Compression Toolkit for neural networks
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: Apache Software License

{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=RcUjhDefodyXKympThN8mCpau5XTpxskz_QG8dlpI5U,1557
+model_compression_toolkit/__init__.py,sha256=dBTcbUHy3iim5N_8DIYbA_lq8Kp7tPhl7FelHVRgJRo,1557
 model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -65,18 +65,18 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_uti
 model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=AkKBP5Dm7iwz7qs5WKDB7Bm8Os-jXaMVnlkyrlw4iRY,4603
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=7iJ2YprFvm2Dk9EkXYrwO7-Sf89f537D-KrQP7XhvPs,8889
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=aAuGIzSDKIDiq07nheeWRXLEatzr6Fvoa5ZHv-2BtCI,7130
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=ig7tdmwTV_3tJLhavR223MAxQCfM0KTY4iOjQ4u7OSw,33683
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=7ROKH1bTQEoyl5yLj10NbOWLFJgJicHBBJmUT_s1xnw,32463
 model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=gsigifJ-ykWNafF4t7UMEC_-nd6YPERAk1_z0kT-Y88,27172
 model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
 model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=UWgxzhKWFOoESLq0TFVz0M1PhkU9d9n6wccSA3RgUxk,7903
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=89gXow5VMOsQX0SxLLoVvVDDxQd1z9b6crEWZgeWSaY,3453
-model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=CNujNzEECINjTe3r-m3OEqsmdzN4BkEOfIDDoua5JFc,34136
+model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=jvPhYhzGMKKgoqkEBMeDcOiM8wHdHxn_hM1RVFgvERw,34262
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=5sbFXgDA6mpkXXAmk6HmR5UvBnuAmkoqTHu3ah6npsY,8529
-model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=6LT3rZo9SlDupO-P22oG7f4sAgF_i1IYz5JxQQuMElU,10841
 model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
-model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=3d4SITFo6yvgPX_CBNYIL9fXsE06wX9CWHqYEFVbWAo,16969
+model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=0NUmtGiAOLl3GObr6V5L6GU19fXmp89GKDlKAKZkxwU,17176
 model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
 model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
 model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -523,8 +523,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
 model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
 model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
-mct_nightly-2.2.0.20250114.84821.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.2.0.20250114.84821.dist-info/METADATA,sha256=0MIDeoiJZufOe3-vqXxevlyrQ2DL9AOD95QE85gPBfE,26603
-mct_nightly-2.2.0.20250114.84821.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-mct_nightly-2.2.0.20250114.84821.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.2.0.20250114.84821.dist-info/RECORD,,
+mct_nightly-2.2.0.20250114.134534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.2.0.20250114.134534.dist-info/METADATA,sha256=quvuXUrjOH_pIW_pD6rxY0fFwE7NxpduT0u1P1eolbk,26604
+mct_nightly-2.2.0.20250114.134534.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+mct_nightly-2.2.0.20250114.134534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.2.0.20250114.134534.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20250114.084821"
+__version__ = "2.2.0.20250114.134534"

model_compression_toolkit/core/common/mixed_precision/{resource_utilization_tools/ru_methods.py → mixed_precision_ru_helper.py} RENAMED Viewed

@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Optional, Tuple
+from typing import List, Set, Dict, Optional, Tuple, Any
 import numpy as np
 from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
 from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
@@ -44,9 +43,8 @@ class MixedPrecisionRUHelper:
     def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
         """
         Compute utilization of requested targets for a specific configuration in the format expected by LP problem
-        formulation, namely an array of ru values corresponding to graph's configurable nodes in the topological order.
-        For activation target, the array contains values for activation cuts in unspecified order (as long as it is
-        consistent between configurations).
+        formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
+        (between calls).
         Args:
             ru_targets: resource utilization targets to compute.
@@ -57,33 +55,26 @@ class MixedPrecisionRUHelper:
         """
         ru = {}
-        act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
-        w_util = None
+        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
         if RUTarget.WEIGHTS in ru_targets:
-            w_util = self._weights_utilization(w_qcs)
-            ru[RUTarget.WEIGHTS] = np.array(list(w_util.values()))
+            wu = self._weights_utilization(w_qcs)
+            ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
-        # TODO make mp agnostic to activation method
         if RUTarget.ACTIVATION in ru_targets:
-            act_util = self._activation_maxcut_utilization(act_qcs)
-            ru[RUTarget.ACTIVATION] = np.array(list(act_util.values()))
-        # TODO use maxcut
-        if RUTarget.TOTAL in ru_targets:
-            act_tensors_util = self._activation_tensor_utilization(act_qcs)
-            w_util = w_util or self._weights_utilization(w_qcs)
-            total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
-                     # for n in self.graph.nodes if n in act_tensors_util or n in w_util}
-                     for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
-            ru[RUTarget.TOTAL] = np.array(list(total.values()))
+            au = self._activation_utilization(act_qcs)
+            ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
         if RUTarget.BOPS in ru_targets:
             ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
+        if RUTarget.TOTAL in ru_targets:
+            raise ValueError('Total target should be computed based on weights and activations targets.')
+        assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
+                                            f'Requested {ru_targets}')
         return ru
-    def get_configurable_qcs(self, mp_cfg) \
+    def get_quantization_candidates(self, mp_cfg) \
             -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
         """
         Retrieve quantization candidates objects for weights and activations from the configuration list.
@@ -92,15 +83,13 @@ class MixedPrecisionRUHelper:
             mp_cfg: a list of candidates indices for configurable layers.
         Returns:
-            Mapping between nodes to weights quantization config, and a mapping between nodes and activation
+            A mapping between nodes to weights quantization config, and a mapping between nodes and activation
             quantization config.
         """
         mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
         node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
-        act_qcs = {n: node_qcs[n].activation_quantization_cfg
-                   for n in self.graph.get_activation_configurable_nodes()}
-        w_qcs = {n: node_qcs[n].weights_quantization_cfg
-                 for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
+        act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
+        w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
     def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
@@ -127,8 +116,8 @@ class MixedPrecisionRUHelper:
         nodes_util = {n: u.bytes for n, u in nodes_util.items()}
         return nodes_util
-    def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-            -> Optional[Dict[Cut, float]]:
+    def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
+            -> Optional[Dict[Any, float]]:
         """
         Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
@@ -138,41 +127,17 @@ class MixedPrecisionRUHelper:
         Returns:
             Activation utilization per cut, or empty dict if no configuration was passed.
         """
-        if act_qcs:
-            _, cuts_util, _ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
-                                                                                    bitwidth_mode=BitwidthMode.QCustom,
-                                                                                    act_qcs=act_qcs)
-            cuts_util = {c: u.bytes for c, u in cuts_util.items()}
-            return cuts_util
-        # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
-        # configurable nodes.
-        return {}
-    def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
-        """
-        Compute activation tensors utilization fo configurable nodes if configuration is passed or
-        for non-configurable nodes otherwise.
+        # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
+        # covered by the computation of configurable activations.
+        if not act_qcs:
+            return {}
-        Args:
-            act_qcs: activation quantization configuration or None.
+        _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
+            TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
+        cuts_util = {c: u.bytes for c, u in cuts_util.items()}
+        return cuts_util
-        Returns:
-            Activation utilization per node.
-        """
-        if act_qcs:
-            target_criterion = TargetInclusionCriterion.QConfigurable
-            bitwidth_mode = BitwidthMode.QCustom
-        else:
-            target_criterion = TargetInclusionCriterion.QNonConfigurable
-            bitwidth_mode = BitwidthMode.QDefaultSP
-        _, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
-                                                                                  bitwidth_mode=bitwidth_mode,
-                                                                                  act_qcs=act_qcs)
-        return {n: u.bytes for n, u in nodes_util.items()}
-    def _bops_utilization(self, mp_cfg: List[int]):
+    def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
         """
         Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
         according to the given mixed-precision configuration of a virtual graph with composed nodes.
@@ -180,15 +145,15 @@ class MixedPrecisionRUHelper:
         Args:
             mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
-        Returns: A vector of node's BOPS count.
-        Note that the vector is not necessarily of the same length as the given config.
+        Returns:
+            A vector of node's BOPS count.
         """
-        # TODO keeping old implementation for now
-        # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
-        # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
+        # bops is computed for all nodes, so non-configurable memory is already covered by the computation of
+        # configurable nodes
+        if not mp_cfg:
+            return np.array([])
+        # TODO keeping old implementation for now
         virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
         mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget, ResourceUtilization
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
-    ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
+    TargetInclusionCriterion, BitwidthMode
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
     MixedPrecisionRUHelper
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
 from model_compression_toolkit.logger import Logger
@@ -67,13 +67,19 @@ class MixedPrecisionSearchManager:
         self.compute_metric_fn = self.get_sensitivity_metric()
         self._cuts = None
-        self.ru_metrics = target_resource_utilization.get_restricted_metrics()
+        # To define RU Total constraints we need to compute weights and activations even if they have no constraints
+        # TODO currently this logic is duplicated in linear_programming.py
+        targets = target_resource_utilization.get_restricted_metrics()
+        if RUTarget.TOTAL in targets:
+            targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
+        self.ru_targets_to_compute = targets
         self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
         self.target_resource_utilization = target_resource_utilization
         self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
-        self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
-        self.non_conf_ru_dict = self._non_configurable_nodes_ru()
+        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
+        self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
                                                                        original_graph=self.original_graph)
@@ -111,18 +117,14 @@ class MixedPrecisionSearchManager:
     def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
         """
         Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
-        The matrix is constructed as follows (for a given target):
-        - Each row represents the set of resource utilization values for a specific resource utilization
-            measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
-        - Each entry in a specific column represents the resource utilization value of a given configuration
-            (single layer is configured with specific candidate, all other layer are at the minimal resource
-            utilization configuration) for the resource utilization measure of the respective row.
+        Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
         Args:
             target: The resource target for which the resource utilization is calculated (a RUTarget value).
-        Returns: A resource utilization matrix.
+        Returns:
+            A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
+            depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
         """
         assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
@@ -132,21 +134,14 @@ class MixedPrecisionSearchManager:
         for c, c_n in enumerate(configurable_sorted_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
-                    # skip ru computation for min configuration. Since we compute the difference from min_ru it'll
-                    # always be 0 for all entries in the results vector.
-                    candidate_rus = np.zeros(shape=self.min_ru[target].shape)
+                    candidate_rus = self.min_ru[target]
                 else:
-                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]
+                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
                 ru_matrix.append(np.asarray(candidate_rus))
-        # We need to transpose the calculated ru matrix to allow later multiplication with
-        # the indicators' diagonal matrix.
-        # We only move the first axis (num of configurations) to be last,
-        # the remaining axes include the metric specific nodes (rows dimension of the new tensor)
-        # and the ru metric values (if they are non-scalars)
-        np_ru_matrix = np.array(ru_matrix)
-        return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
+        np_ru_matrix = np.array(ru_matrix) - self.min_ru[target]    # num configurations X num elements
+        return np_ru_matrix
     def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
         """
@@ -162,7 +157,6 @@ class MixedPrecisionSearchManager:
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        # TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
         return self.ru_helper.compute_utilization({target}, cfg)[target]
     @staticmethod
@@ -183,18 +177,6 @@ class MixedPrecisionSearchManager:
         updated_cfg[idx] = value
         return updated_cfg
-    def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
-        """
-        Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
-        resource utilization targets.
-        Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
-        """
-        ru_metrics = self.ru_metrics - {RUTarget.BOPS}
-        ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
-        ru[RUTarget.BOPS] = None
-        return ru
     def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
         """
         Computes the resource utilization values for a given mixed-precision configuration.
@@ -206,7 +188,7 @@ class MixedPrecisionSearchManager:
         with the given config.
         """
-        act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
+        act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
             w_qcs=w_qcs)

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py CHANGED Viewed

@@ -88,7 +88,7 @@ class Utilization(NamedTuple):
         # Needed for sum (with default start_value=0).
         if other == 0:
             return self
-        return self + other
+        return self + other    # pragma: no cover
     def __gt__(self, other: 'Utilization'):
         # Needed for max. Compare by bytes.
@@ -96,7 +96,7 @@ class Utilization(NamedTuple):
     def __lt__(self, other: 'Utilization'):
         # Needed for min. Compare by bytes.
-        return self.bytes < other.bytes
+        return self.bytes < other.bytes    # pragma: no cover
 class ResourceUtilizationCalculator:
@@ -119,7 +119,21 @@ class ResourceUtilizationCalculator:
         for n in graph.nodes:
             self._act_tensors_size[n] = n.get_total_output_params()
             self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
-        self._cuts = None
+        self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
+    @property
+    def cuts(self) -> Dict[Cut, List[BaseNode]]:
+        """ Compute if needed and return graph cuts and their memory element nodes. """
+        if self._cuts is None:
+            memory_graph = MemoryGraph(deepcopy(self.graph))
+            _, _, cuts = compute_graph_max_cut(memory_graph)
+            if cuts is None:    # pragma: no cover
+                raise RuntimeError("Failed to calculate activation memory cuts for graph.")  # pragma: no cover
+            cuts = [cut for cut in cuts if cut.mem_elements.elements]
+            # cache cuts nodes for future use, so do not filter by target
+            self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
+                          for cut in cuts}
+        return self._cuts
     def compute_resource_utilization(self,
                                      target_criterion: TargetInclusionCriterion,
@@ -152,10 +166,10 @@ class ResourceUtilizationCalculator:
         elif w_qcs is not None:    # pragma: no cover
             raise ValueError('Weight configuration passed but no relevant metric requested.')
-        if act_qcs and not {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):    # pragma: no cover
-            raise ValueError('Activation configuration passed but no relevant metric requested.')
-        if RUTarget.ACTIVATION in ru_targets:
+        if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
             a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
+        elif act_qcs is not None:    # pragma: no cover
+            raise ValueError('Activation configuration passed but no relevant metric requested.')
         ru = ResourceUtilization()
         if RUTarget.WEIGHTS in ru_targets:
@@ -163,9 +177,7 @@ class ResourceUtilizationCalculator:
         if RUTarget.ACTIVATION in ru_targets:
             ru.activation_memory = a_total
         if RUTarget.TOTAL in ru_targets:
-            # TODO use maxcut
-            act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
-            ru.total_memory = w_total + act_tensors_total
+            ru.total_memory = w_total + a_total
         if RUTarget.BOPS in ru_targets:
             ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
                                            bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
@@ -262,12 +274,12 @@ class ResourceUtilizationCalculator:
         Returns:
             Total activation utilization of the network.
         """
-        return self.compute_cut_activation_utilization(target_criterion, bitwidth_mode, act_qcs)[0]
+        return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
-    def compute_cut_activation_utilization(self,
-                                           target_criterion: TargetInclusionCriterion,
-                                           bitwidth_mode: BitwidthMode,
-                                           act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
+    def compute_activation_utilization_by_cut(self,
+                                              target_criterion: TargetInclusionCriterion,
+                                              bitwidth_mode: BitwidthMode,
+                                              act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
             -> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
         """
         Compute graph activation cuts utilization.
@@ -292,20 +304,10 @@ class ResourceUtilizationCalculator:
         if not graph_target_nodes:
             return 0, {}, {}
-        if self._cuts is None:
-            memory_graph = MemoryGraph(deepcopy(self.graph))
-            _, _, cuts = compute_graph_max_cut(memory_graph)
-            if cuts is None:    # pragma: no cover
-                raise RuntimeError("Failed to calculate activation memory cuts for graph.")  # pragma: no cover
-            cuts = [cut for cut in cuts if cut.mem_elements.elements]
-            # cache cuts nodes for future use, so do not filter by target
-            self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
-                          for cut in cuts}
         util_per_cut: Dict[Cut, Utilization] = {}    # type: ignore
         util_per_cut_per_node = defaultdict(dict)
-        for cut in self._cuts:
-            cut_target_nodes = [n for n in self._cuts[cut] if n in graph_target_nodes]
+        for cut in self.cuts:
+            cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
             if not cut_target_nodes:
                 continue
             for n in cut_target_nodes:
@@ -322,7 +324,7 @@ class ResourceUtilizationCalculator:
                                                bitwidth_mode: BitwidthMode,
                                                act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
                                                include_reused=False) \
-            -> Tuple[float, Dict[BaseNode, Utilization]]:
+            -> Tuple[float, Dict[BaseNode, Utilization]]:    # pragma: no cover
         """
         Compute resource utilization for graph's activations tensors.
@@ -462,7 +464,6 @@ class ResourceUtilizationCalculator:
         node_bops = a_nbits * w_nbits * node_mac
         return node_bops
-    @lru_cache
     def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
         """
         Retrieve target nodes from a cut filtered by a criterion.
@@ -474,7 +475,7 @@ class ResourceUtilizationCalculator:
         Returns:
             A list of target nodes from a cut.
         """
-        cut_nodes = [self.graph.find_node_by_name(e.node_name)[0] for e in cut.mem_elements.elements]
+        cut_nodes = self.cuts[cut]
         return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
     def _get_target_weight_nodes(self,
@@ -500,7 +501,7 @@ class ResourceUtilizationCalculator:
             quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
             configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
             nodes = [n for n in quantized if n not in configurable]
-        elif target_criterion == TargetInclusionCriterion.Any:
+        elif target_criterion == TargetInclusionCriterion.Any:    # pragma: no cover
             nodes = list(self.graph.nodes)
         else:    # pragma: no cover
             raise ValueError(f'Unknown {target_criterion}.')
@@ -566,15 +567,15 @@ class ResourceUtilizationCalculator:
             Selected nodes.
         """
         nodes = nodes or self.graph.nodes
-        if target_criterion == TargetInclusionCriterion.QConfigurable:
+        if target_criterion == TargetInclusionCriterion.QConfigurable:    # pragma: no cover
             nodes = [n for n in nodes if n.has_configurable_activation()]
         elif target_criterion == TargetInclusionCriterion.AnyQuantized:
             nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
-        elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
+        elif target_criterion == TargetInclusionCriterion.QNonConfigurable:    # pragma: no cover
             nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
         elif target_criterion != TargetInclusionCriterion.Any:    # pragma: no cover
             raise ValueError(f'Unknown {target_criterion}.')
-        if not include_reused:
+        if not include_reused:    # pragma: no cover
             nodes = [n for n in nodes if not n.reuse]
         return nodes
@@ -664,4 +665,4 @@ class ResourceUtilizationCalculator:
                                  f'as it {len(w_qcs)}!=1 unique candidates.')
             return w_qcs[0].weights_n_bits
-        raise ValueError(f'Unknown mode {bitwidth_mode.name}')
+        raise ValueError(f'Unknown mode {bitwidth_mode.name}')    # pragma: no cover

model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import numpy as np
 from pulp import *
 from tqdm import tqdm
-from typing import Dict, Tuple
+from typing import Dict, Tuple, Set, Any
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
@@ -167,95 +167,95 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
         indicators_arr = np.array(indicators)
         indicators_matrix = np.diag(indicators_arr)
-        for target, ru_value in target_resource_utilization.get_resource_utilization_dict().items():
-            if not np.isinf(ru_value):
-                non_conf_ru_vector = None if search_manager.non_conf_ru_dict is None \
-                    else search_manager.non_conf_ru_dict.get(target)
-                _add_set_of_ru_constraints(search_manager=search_manager,
-                                           target=target,
-                                           target_resource_utilization_value=ru_value,
-                                           indicators_matrix=indicators_matrix,
-                                           lp_problem=lp_problem,
-                                           non_conf_ru_vector=non_conf_ru_vector)
+        _add_ru_constraints(search_manager=search_manager,
+                            target_resource_utilization=target_resource_utilization,
+                            indicators_matrix=indicators_matrix,
+                            lp_problem=lp_problem,
+                            non_conf_ru_dict=search_manager.non_conf_ru_dict)
     else:  # pragma: no cover
         Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
                         "A valid 'target_resource_utilization' is required.")
     return lp_problem
-def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
-                               target: RUTarget,
-                               target_resource_utilization_value: float,
-                               indicators_matrix: np.ndarray,
-                               lp_problem: LpProblem,
-                               non_conf_ru_vector: np.ndarray):
+def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
+                        target_resource_utilization: ResourceUtilization,
+                        indicators_matrix: np.ndarray,
+                        lp_problem: LpProblem,
+                        non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]):
     """
-    Adding a constraint for the Lp problem for the given target resource utilization.
+    Adding targets constraints for the Lp problem for the given target resource utilization.
     The update to the Lp problem object is done inplace.
     Args:
         search_manager:  MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
-        target: A RUTarget.
-        target_resource_utilization_value: Target resource utilization value of the given target resource utilization
-        for which the constraint is added.
+        target_resource_utilization: Target resource utilization.
         indicators_matrix: A diagonal matrix of the Lp problem's indicators.
         lp_problem: An Lp problem object to add constraint to.
-        non_conf_ru_vector: A non-configurable nodes' resource utilization vector.
+        non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
     """
+    ru_indicated_vectors = {}
+    # targets to add constraints for
+    constraints_targets = target_resource_utilization.get_restricted_metrics()
+    # to add constraints for Total target we need to compute weight and activation
+    targets_to_compute = constraints_targets
+    if RUTarget.TOTAL in constraints_targets:
+        targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
+    for target in targets_to_compute:
+        ru_matrix = search_manager.compute_resource_utilization_matrix(target)    # num elements X num configurations
+        indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix)    # num elements X num configurations
+        # Sum the indicated values over all configurations, and add the value for minimal configuration once.
+        # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
+        # that would be required if that configuration is selected).
+        # Each element in a vector is an lp object representing the configurations sum term for a memory element.
+        ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
+        non_conf_ru_vec = non_conf_ru_dict[target]
+        if non_conf_ru_vec is not None and non_conf_ru_vec.size:
+            # add non-conf value as additional mem elements so that they get aggregated
+            ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
+        ru_indicated_vectors[target] = ru_vec
+    # add constraints only for the restricted targets in target resource utilization.
+    for target in constraints_targets:
+        target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
+        aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
+        for v in aggr_ru:
+            if isinstance(v, float):
+                if v > target_resource_utilization_value:
+                    Logger.critical(
+                        f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
+                        f"with the value {target_resource_utilization_value}.")  # pragma: no cover
+            else:
+                lp_problem += v <= target_resource_utilization_value
-    ru_matrix = search_manager.compute_resource_utilization_matrix(target)
-    indicated_ru_matrix = np.matmul(ru_matrix, indicators_matrix)
-    # Need to re-organize the tensor such that the configurations' axis will be second,
-    # and all metric values' axis will come afterword
-    indicated_ru_matrix = np.moveaxis(indicated_ru_matrix, source=len(indicated_ru_matrix.shape) - 1, destination=1)
-    # In order to get the result resource utilization according to a chosen set of indicators, we sum each row in
-    # the result matrix. Each row represents the resource utilization values for a specific resource utilization metric,
-    # such that only elements corresponding to a configuration which implied by the set of indicators will have some
-    # positive value different than 0 (and will contribute to the total resource utilization).
-    ru_sum_vector = np.array([
-        np.sum(indicated_ru_matrix[i], axis=0) +  # sum of metric values over all configurations in a row
-        search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])])
-    ru_vec = ru_sum_vector
-    if non_conf_ru_vector is not None and non_conf_ru_vector.size:
-        ru_vec = np.concatenate([ru_vec, non_conf_ru_vector])
-    aggr_ru = _aggregate_for_lp(ru_vec, target)
-    for v in aggr_ru:
-        if isinstance(v, float):
-            if v > target_resource_utilization_value:
-                Logger.critical(
-                    f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
-                    f"with the value {target_resource_utilization_value}.")  # pragma: no cover
-        else:
-            lp_problem += v <= target_resource_utilization_value
-def _aggregate_for_lp(ru_vec, target: RUTarget) -> list:
+def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
     """
     Aggregate resource utilization values for the LP.
     Args:
-        ru_vec: a vector of resource utilization values.
+        targets_ru_vec: resource utilization vectors for all precomputed targets.
         target: resource utilization target.
     Returns:
         Aggregated resource utilization.
     """
     if target == RUTarget.TOTAL:
-        w = lpSum(v[0] for v in ru_vec)
-        return [w + v[1] for v in ru_vec]
+        w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
+        act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
+        return [w + v for v in act_ru_vec]
     if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
-        return [lpSum(ru_vec)]
+        return [lpSum(targets_ru_vec[target])]
     if target == RUTarget.ACTIVATION:
         # for max aggregation, each value constitutes a separate constraint
-        return list(ru_vec)
+        return list(targets_ru_vec[target])
-    raise ValueError(f'Unexpected target {target}.')
+    raise ValueError(f'Unexpected target {target}.')    # pragma: no cover
 def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,

{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20250114.84821__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl

mct-nightly 2.2.0.20250114.84821py3-none-any.whl → 2.2.0.20250114.134534py3-none-any.whl