PyPI - mct-nightly - Versions diffs - 1.11.0.20240320.400__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl - Mend

mct-nightly 1.11.0.20240320.400py3-none-any.whl → 1.11.0.20240322.404py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (155) hide show

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py ADDED Viewed

@@ -0,0 +1,105 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import copy
+from enum import Enum
+from functools import partial
+from typing import List, Any
+import numpy as np
+from pulp import lpSum
+def sum_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[Any]:
+    """
+    Aggregates resource utilization vector to a single resource utilization measure by summing all values.
+    Args:
+        ru_vector: A vector with nodes' resource utilization values.
+        set_constraints: A flag for utilizing the method for resource utilization computation of a
+            given config not for LP formalization purposes.
+    Returns: A list with an lpSum object for lp problem definition with the vector's sum.
+    """
+    if not set_constraints:
+        return [0] if len(ru_vector) == 0 else [sum(ru_vector)]
+    return [lpSum(ru_vector)]
+def max_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[float]:
+    """
+    Aggregates resource utilization vector to allow max constraint in the linear programming problem formalization.
+    In order to do so, we need to define a separate constraint on each value in the resource utilization vector,
+    to be bounded by the target resource utilization.
+    Args:
+        ru_vector: A vector with nodes' resource utilization values.
+        set_constraints: A flag for utilizing the method for resource utilization computation of a
+            given config not for LP formalization purposes.
+    Returns: A list with the vector's values, to be used to define max constraint
+    in the linear programming problem formalization.
+    """
+    if not set_constraints:
+        return [0] if len(ru_vector) == 0 else [max(ru_vector)]
+    return [ru for ru in ru_vector]
+def total_ru(ru_tensor: np.ndarray, set_constraints: bool = True) -> List[float]:
+    """
+    Aggregates resource utilization vector to allow weights and activation total utilization constraint in the linear programming
+    problem formalization. In order to do so, we need to define a separate constraint on each activation memory utilization value in
+    the resource utilization vector, combined with the sum weights memory utilization.
+    Note that the given ru_tensor should contain weights and activation utilization values in each entry.
+    Args:
+        ru_tensor: A tensor with nodes' resource utilization values for weights and activation.
+        set_constraints: A flag for utilizing the method for resource utilization computation of a
+            given config not for LP formalization purposes.
+    Returns: A list with lpSum objects, to be used to define total constraint
+    in the linear programming problem formalization.
+    """
+    if not set_constraints:
+        weights_ru = sum([ru[0] for ru in ru_tensor])
+        activation_ru = max([ru[1] for ru in ru_tensor])
+        return [weights_ru + activation_ru]
+    weights_ru = lpSum([ru[0] for ru in ru_tensor])
+    total_ru = [weights_ru + activation_ru for _, activation_ru in ru_tensor]
+    return total_ru
+class MpRuAggregation(Enum):
+    """
+    Defines resource utilization aggregation functions that can be used to compute final resource utilization metric.
+    The enum values can be used to call a function on a set of arguments.
+     SUM - applies the sum_ru_values function
+     MAX - applies the max_ru_values function
+     TOTAL - applies the total_ru function
+    """
+    SUM = partial(sum_ru_values)
+    MAX = partial(max_ru_values)
+    TOTAL = partial(total_ru)
+    def __call__(self, *args):
+        return self.value(*args)

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py ADDED Viewed

@@ -0,0 +1,26 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
+# When adding a RUTarget that we want to consider in our mp search,
+# a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
+# aggregation function should be added to this dictionary
+ru_functions_mapping = {RUTarget.WEIGHTS: (MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
+                        RUTarget.ACTIVATION: (MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
+                        RUTarget.TOTAL: (MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
+                        RUTarget.BOPS: (MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}

model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} RENAMED Viewed

@@ -28,14 +28,14 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
 from model_compression_toolkit.logger import Logger
-def weights_size_kpi(mp_cfg: List[int],
-                     graph: Graph,
-                     fw_info: FrameworkInfo,
-                     fw_impl: FrameworkImplementation) -> np.ndarray:
+def weights_size_utilization(mp_cfg: List[int],
+                             graph: Graph,
+                             fw_info: FrameworkInfo,
+                             fw_impl: FrameworkImplementation) -> np.ndarray:
     """
-    Computes a KPIs vector with the respective weights' memory size for the given weight configurable node,
+    Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
     according to the given mixed-precision configuration.
-    If an empty configuration is given, then computes KPI vector for non-configurable nodes.
+    If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
     Args:
         mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -52,7 +52,7 @@ def weights_size_kpi(mp_cfg: List[int],
     weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
     if len(mp_cfg) == 0:
-        # Computing non-configurable nodes KPI
+        # Computing non-configurable nodes resource utilization
         # TODO: when enabling multiple attribute quantization by default (currently,
         #  only kernel quantization is enabled) we should include other attributes memory in the sum of all
         #  weights memory (when quantized to their default 8-bit, non-configurable).
@@ -71,7 +71,8 @@ def weights_size_kpi(mp_cfg: List[int],
                 node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
                 weights_memory.append(node_weights_memory_in_bytes)
     else:
-        # Go over configurable all nodes that should be taken into consideration when computing the weights KPI.
+        # Go over configurable all nodes that should be taken into consideration when computing the weights
+        # resource utilization.
         for n in graph.get_sorted_weights_configurable_nodes(fw_info):
             # Only nodes with kernel op can be considered configurable
             kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
@@ -86,14 +87,14 @@ def weights_size_kpi(mp_cfg: List[int],
     return np.array(weights_memory)
-def activation_output_size_kpi(mp_cfg: List[int],
-                               graph: Graph,
-                               fw_info: FrameworkInfo,
-                               fw_impl: FrameworkImplementation) -> np.ndarray:
+def activation_output_size_utilization(mp_cfg: List[int],
+                                       graph: Graph,
+                                       fw_info: FrameworkInfo,
+                                       fw_impl: FrameworkImplementation) -> np.ndarray:
     """
-    Computes a KPIs vector with the respective output memory size for each activation configurable node,
+    Computes a resource utilization vector with the respective output memory size for each activation configurable node,
     according to the given mixed-precision configuration.
-    If an empty configuration is given, then computes KPI vector for non-configurable nodes.
+    If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
     Args:
         mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -111,7 +112,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
     activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
     if len(mp_cfg) == 0:
-        # Computing non-configurable nodes KPI
+        # Computing non-configurable nodes resource utilization
         for n in graph.nodes:
             non_configurable_node = n.name not in activation_mp_nodes \
                                     and n.has_activation_quantization_enabled_candidate() \
@@ -122,7 +123,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
                 node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
                 activation_memory.append(node_activation_memory_in_bytes)
     else:
-        # Go over all nodes that should be taken into consideration when computing the weights KPI.
+        # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
         for n in graph.get_sorted_activation_configurable_nodes():
             node_idx = mp_nodes.index(n.name)
             node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
@@ -135,14 +136,14 @@ def activation_output_size_kpi(mp_cfg: List[int],
     return np.array(activation_memory)
-def total_weights_activation_kpi(mp_cfg: List[int],
-                                 graph: Graph,
-                                 fw_info: FrameworkInfo,
-                                 fw_impl: FrameworkImplementation) -> np.ndarray:
+def total_weights_activation_utilization(mp_cfg: List[int],
+                                         graph: Graph,
+                                         fw_info: FrameworkInfo,
+                                         fw_impl: FrameworkImplementation) -> np.ndarray:
     """
-    Computes KPIs tensor with the respective weights size and output memory size for each activation configurable node,
+    Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
     according to the given mixed-precision configuration.
-    If an empty configuration is given, then computes KPI vector for non-configurable nodes.
+    If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
     Args:
         mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -160,15 +161,15 @@ def total_weights_activation_kpi(mp_cfg: List[int],
     activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
     if len(mp_cfg) == 0:
-        # Computing non-configurable nodes KPI
+        # Computing non-configurable nodes utilization
         for n in graph.nodes:
             non_configurable = False
             node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
             # Non-configurable Weights
-            # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute
-            #  quantization we need to modify this method to count all attributes.
+            # TODO: currently considering only kernel attributes in weights memory utilization.
+            #  When enabling multi-attribute quantization we need to modify this method to count all attributes.
             kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
             if kernel_attr is not None:
                 is_non_configurable_weights = n.name not in weights_mp_nodes and \
@@ -196,9 +197,9 @@ def total_weights_activation_kpi(mp_cfg: List[int],
                     np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
     else:
         # Go over all nodes that should be taken into consideration when computing the weights or
-        # activation KPI (all configurable nodes).
+        # activation memory utilization (all configurable nodes).
         for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
-            # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute
+            # TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
             #  quantization we need to modify this method to count all attributes.
             node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
@@ -222,13 +223,13 @@ def total_weights_activation_kpi(mp_cfg: List[int],
     return np.array(weights_activation_memory)
-def bops_kpi(mp_cfg: List[int],
-             graph: Graph,
-             fw_info: FrameworkInfo,
-             fw_impl: FrameworkImplementation,
-             set_constraints: bool = True) -> np.ndarray:
+def bops_utilization(mp_cfg: List[int],
+                     graph: Graph,
+                     fw_info: FrameworkInfo,
+                     fw_impl: FrameworkImplementation,
+                     set_constraints: bool = True) -> np.ndarray:
     """
-    Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node,
+    Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
     according to the given mixed-precision configuration of a virtual graph with composed nodes.
     Args:
@@ -236,7 +237,7 @@ def bops_kpi(mp_cfg: List[int],
         graph: Graph object.
         fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
-        set_constraints: A flag for utilizing the method for KPI computation of a
+        set_constraints: A flag for utilizing the method for resource utilization computation of a
             given config not for LP formalization purposes.
     Returns: A vector of node's BOPS count.
@@ -245,12 +246,12 @@ def bops_kpi(mp_cfg: List[int],
     """
     if not set_constraints:
-        return _bops_kpi(mp_cfg,
-                         graph,
-                         fw_info,
-                         fw_impl)
+        return _bops_utilization(mp_cfg,
+                                 graph,
+                                 fw_info,
+                                 fw_impl)
-    # BOPs KPI method considers non-configurable nodes, therefore, it doesn't need separate implementation
+    # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
     # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
     virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
@@ -261,12 +262,12 @@ def bops_kpi(mp_cfg: List[int],
     return np.array(bops)
-def _bops_kpi(mp_cfg: List[int],
-              graph: Graph,
-              fw_info: FrameworkInfo,
-              fw_impl: FrameworkImplementation) -> np.ndarray:
+def _bops_utilization(mp_cfg: List[int],
+                      graph: Graph,
+                      fw_info: FrameworkInfo,
+                      fw_impl: FrameworkImplementation) -> np.ndarray:
     """
-    Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node,
+    Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
     according to the given mixed-precision configuration of an original graph.
     Args:
@@ -281,19 +282,18 @@ def _bops_kpi(mp_cfg: List[int],
     mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
-    # Go over all nodes that should be taken into consideration when computing the BOPS KPI.
+    # Go over all nodes that should be taken into consideration when computing the BOPS utilization.
     bops = []
     for n in graph.get_topo_sorted_nodes():
         if n.has_kernel_weight_to_quantize(fw_info):
             # If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
             incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
             if len(incoming_edges) != 1:
-                Logger.critical(f"Can't compute BOPS metric for node {n.name} with multiple inputs.")  # pragma: no cover
+                Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.")  # pragma: no cover
             input_activation_node = incoming_edges[0].source_node
             if len(graph.out_edges(input_activation_node)) > 1:
                 # In the case where the activation node has multiple outgoing edges
-                # we don't consider this edge in the BOPS KPI calculation
+                # we don't consider this edge in the BOPS utilization calculation
                 continue
             input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
@@ -338,7 +338,7 @@ def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nod
 def _get_origin_weights_node(n: BaseNode) -> BaseNode:
     """
-    In case we run a KPI computation on a virtual graph,
+    In case we run a resource utilization computation on a virtual graph,
     this method is used to retrieve the original node out of a virtual weights node,
     Args:
@@ -358,7 +358,7 @@ def _get_origin_weights_node(n: BaseNode) -> BaseNode:
 def _get_origin_activation_node(n: BaseNode) -> BaseNode:
     """
-    In case we run a KPI computation on a virtual graph,
+    In case we run a resource utilization computation on a virtual graph,
     this method is used to retrieve the original node out of a virtual activation node,
     Args:
@@ -417,25 +417,25 @@ def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
     return node_output_size * node_nbits / BITS_TO_BYTES
-class MpKpiMetric(Enum):
+class MpRuMetric(Enum):
     """
-    Defines kpi computation functions that can be used to compute KPI for a given target for a given mp config.
-    The enum values can be used to call a function on a set of arguments.
+    Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
+    for a given mp config. The enum values can be used to call a function on a set of arguments.
-     WEIGHTS_SIZE - applies the weights_size_kpi function
+     WEIGHTS_SIZE - applies the weights_size_utilization function
-     ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_kpi function
+     ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
-     TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_kpi function
+     TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
-     BOPS_COUNT - applies the bops_kpi function
+     BOPS_COUNT - applies the bops_utilization function
     """
-    WEIGHTS_SIZE = partial(weights_size_kpi)
-    ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_kpi)
-    TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_kpi)
-    BOPS_COUNT = partial(bops_kpi)
+    WEIGHTS_SIZE = partial(weights_size_utilization)
+    ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
+    TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
+    BOPS_COUNT = partial(bops_utilization)
     def __call__(self, *args):
         return self.value(*args)

mct-nightly 1.11.0.20240320.400__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl

mct-nightly 1.11.0.20240320.400py3-none-any.whl → 1.11.0.20240322.404py3-none-any.whl