mct-nightly 2.2.0.20250114.84821__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: mct-nightly
3
- Version: 2.2.0.20250114.84821
3
+ Version: 2.2.0.20250114.134534
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: Apache Software License
@@ -1,4 +1,4 @@
1
- model_compression_toolkit/__init__.py,sha256=RcUjhDefodyXKympThN8mCpau5XTpxskz_QG8dlpI5U,1557
1
+ model_compression_toolkit/__init__.py,sha256=dBTcbUHy3iim5N_8DIYbA_lq8Kp7tPhl7FelHVRgJRo,1557
2
2
  model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
3
3
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
4
4
  model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -65,18 +65,18 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_uti
65
65
  model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
66
66
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=AkKBP5Dm7iwz7qs5WKDB7Bm8Os-jXaMVnlkyrlw4iRY,4603
67
67
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
68
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=7iJ2YprFvm2Dk9EkXYrwO7-Sf89f537D-KrQP7XhvPs,8889
68
69
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=aAuGIzSDKIDiq07nheeWRXLEatzr6Fvoa5ZHv-2BtCI,7130
69
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=ig7tdmwTV_3tJLhavR223MAxQCfM0KTY4iOjQ4u7OSw,33683
70
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=7ROKH1bTQEoyl5yLj10NbOWLFJgJicHBBJmUT_s1xnw,32463
70
71
  model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=gsigifJ-ykWNafF4t7UMEC_-nd6YPERAk1_z0kT-Y88,27172
71
72
  model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
72
73
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=UWgxzhKWFOoESLq0TFVz0M1PhkU9d9n6wccSA3RgUxk,7903
73
74
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
74
75
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=89gXow5VMOsQX0SxLLoVvVDDxQd1z9b6crEWZgeWSaY,3453
75
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=CNujNzEECINjTe3r-m3OEqsmdzN4BkEOfIDDoua5JFc,34136
76
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=jvPhYhzGMKKgoqkEBMeDcOiM8wHdHxn_hM1RVFgvERw,34262
76
77
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=5sbFXgDA6mpkXXAmk6HmR5UvBnuAmkoqTHu3ah6npsY,8529
77
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=6LT3rZo9SlDupO-P22oG7f4sAgF_i1IYz5JxQQuMElU,10841
78
78
  model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
79
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=3d4SITFo6yvgPX_CBNYIL9fXsE06wX9CWHqYEFVbWAo,16969
79
+ model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=0NUmtGiAOLl3GObr6V5L6GU19fXmp89GKDlKAKZkxwU,17176
80
80
  model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
81
81
  model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
82
82
  model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -523,8 +523,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
523
523
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
524
524
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
525
525
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
526
- mct_nightly-2.2.0.20250114.84821.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
527
- mct_nightly-2.2.0.20250114.84821.dist-info/METADATA,sha256=0MIDeoiJZufOe3-vqXxevlyrQ2DL9AOD95QE85gPBfE,26603
528
- mct_nightly-2.2.0.20250114.84821.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
529
- mct_nightly-2.2.0.20250114.84821.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
530
- mct_nightly-2.2.0.20250114.84821.dist-info/RECORD,,
526
+ mct_nightly-2.2.0.20250114.134534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
527
+ mct_nightly-2.2.0.20250114.134534.dist-info/METADATA,sha256=quvuXUrjOH_pIW_pD6rxY0fFwE7NxpduT0u1P1eolbk,26604
528
+ mct_nightly-2.2.0.20250114.134534.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
529
+ mct_nightly-2.2.0.20250114.134534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
530
+ mct_nightly-2.2.0.20250114.134534.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.2.0.20250114.084821"
30
+ __version__ = "2.2.0.20250114.134534"
@@ -12,14 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from typing import List, Set, Dict, Optional, Tuple
15
+ from typing import List, Set, Dict, Optional, Tuple, Any
16
16
 
17
17
  import numpy as np
18
18
 
19
19
  from model_compression_toolkit.core import FrameworkInfo
20
20
  from model_compression_toolkit.core.common import Graph, BaseNode
21
21
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
22
- from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
23
22
  from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
24
23
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
25
24
  RUTarget
@@ -44,9 +43,8 @@ class MixedPrecisionRUHelper:
44
43
  def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
45
44
  """
46
45
  Compute utilization of requested targets for a specific configuration in the format expected by LP problem
47
- formulation, namely an array of ru values corresponding to graph's configurable nodes in the topological order.
48
- For activation target, the array contains values for activation cuts in unspecified order (as long as it is
49
- consistent between configurations).
46
+ formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
47
+ (between calls).
50
48
 
51
49
  Args:
52
50
  ru_targets: resource utilization targets to compute.
@@ -57,33 +55,26 @@ class MixedPrecisionRUHelper:
57
55
  """
58
56
 
59
57
  ru = {}
60
-
61
- act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
62
- w_util = None
58
+ act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
63
59
  if RUTarget.WEIGHTS in ru_targets:
64
- w_util = self._weights_utilization(w_qcs)
65
- ru[RUTarget.WEIGHTS] = np.array(list(w_util.values()))
60
+ wu = self._weights_utilization(w_qcs)
61
+ ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
66
62
 
67
- # TODO make mp agnostic to activation method
68
63
  if RUTarget.ACTIVATION in ru_targets:
69
- act_util = self._activation_maxcut_utilization(act_qcs)
70
- ru[RUTarget.ACTIVATION] = np.array(list(act_util.values()))
71
-
72
- # TODO use maxcut
73
- if RUTarget.TOTAL in ru_targets:
74
- act_tensors_util = self._activation_tensor_utilization(act_qcs)
75
- w_util = w_util or self._weights_utilization(w_qcs)
76
- total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
77
- # for n in self.graph.nodes if n in act_tensors_util or n in w_util}
78
- for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
79
- ru[RUTarget.TOTAL] = np.array(list(total.values()))
64
+ au = self._activation_utilization(act_qcs)
65
+ ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
80
66
 
81
67
  if RUTarget.BOPS in ru_targets:
82
68
  ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
83
69
 
70
+ if RUTarget.TOTAL in ru_targets:
71
+ raise ValueError('Total target should be computed based on weights and activations targets.')
72
+
73
+ assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
74
+ f'Requested {ru_targets}')
84
75
  return ru
85
76
 
86
- def get_configurable_qcs(self, mp_cfg) \
77
+ def get_quantization_candidates(self, mp_cfg) \
87
78
  -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
88
79
  """
89
80
  Retrieve quantization candidates objects for weights and activations from the configuration list.
@@ -92,15 +83,13 @@ class MixedPrecisionRUHelper:
92
83
  mp_cfg: a list of candidates indices for configurable layers.
93
84
 
94
85
  Returns:
95
- Mapping between nodes to weights quantization config, and a mapping between nodes and activation
86
+ A mapping between nodes to weights quantization config, and a mapping between nodes and activation
96
87
  quantization config.
97
88
  """
98
89
  mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
99
90
  node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
100
- act_qcs = {n: node_qcs[n].activation_quantization_cfg
101
- for n in self.graph.get_activation_configurable_nodes()}
102
- w_qcs = {n: node_qcs[n].weights_quantization_cfg
103
- for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
91
+ act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
92
+ w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
104
93
  return act_qcs, w_qcs
105
94
 
106
95
  def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
@@ -127,8 +116,8 @@ class MixedPrecisionRUHelper:
127
116
  nodes_util = {n: u.bytes for n, u in nodes_util.items()}
128
117
  return nodes_util
129
118
 
130
- def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
131
- -> Optional[Dict[Cut, float]]:
119
+ def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
120
+ -> Optional[Dict[Any, float]]:
132
121
  """
133
122
  Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
134
123
 
@@ -138,41 +127,17 @@ class MixedPrecisionRUHelper:
138
127
  Returns:
139
128
  Activation utilization per cut, or empty dict if no configuration was passed.
140
129
  """
141
- if act_qcs:
142
- _, cuts_util, _ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
143
- bitwidth_mode=BitwidthMode.QCustom,
144
- act_qcs=act_qcs)
145
- cuts_util = {c: u.bytes for c, u in cuts_util.items()}
146
- return cuts_util
147
-
148
- # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
149
- # configurable nodes.
150
- return {}
151
-
152
- def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
153
- """
154
- Compute activation tensors utilization fo configurable nodes if configuration is passed or
155
- for non-configurable nodes otherwise.
130
+ # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
131
+ # covered by the computation of configurable activations.
132
+ if not act_qcs:
133
+ return {}
156
134
 
157
- Args:
158
- act_qcs: activation quantization configuration or None.
135
+ _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
136
+ TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
137
+ cuts_util = {c: u.bytes for c, u in cuts_util.items()}
138
+ return cuts_util
159
139
 
160
- Returns:
161
- Activation utilization per node.
162
- """
163
- if act_qcs:
164
- target_criterion = TargetInclusionCriterion.QConfigurable
165
- bitwidth_mode = BitwidthMode.QCustom
166
- else:
167
- target_criterion = TargetInclusionCriterion.QNonConfigurable
168
- bitwidth_mode = BitwidthMode.QDefaultSP
169
-
170
- _, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
171
- bitwidth_mode=bitwidth_mode,
172
- act_qcs=act_qcs)
173
- return {n: u.bytes for n, u in nodes_util.items()}
174
-
175
- def _bops_utilization(self, mp_cfg: List[int]):
140
+ def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
176
141
  """
177
142
  Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
178
143
  according to the given mixed-precision configuration of a virtual graph with composed nodes.
@@ -180,15 +145,15 @@ class MixedPrecisionRUHelper:
180
145
  Args:
181
146
  mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
182
147
 
183
- Returns: A vector of node's BOPS count.
184
- Note that the vector is not necessarily of the same length as the given config.
185
-
148
+ Returns:
149
+ A vector of node's BOPS count.
186
150
  """
187
- # TODO keeping old implementation for now
188
-
189
- # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
190
- # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
151
+ # bops is computed for all nodes, so non-configurable memory is already covered by the computation of
152
+ # configurable nodes
153
+ if not mp_cfg:
154
+ return np.array([])
191
155
 
156
+ # TODO keeping old implementation for now
192
157
  virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
193
158
 
194
159
  mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
@@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
26
26
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
27
27
  RUTarget, ResourceUtilization
28
28
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
29
- ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
30
- from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import \
29
+ TargetInclusionCriterion, BitwidthMode
30
+ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
31
31
  MixedPrecisionRUHelper
32
32
  from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
33
33
  from model_compression_toolkit.logger import Logger
@@ -67,13 +67,19 @@ class MixedPrecisionSearchManager:
67
67
  self.compute_metric_fn = self.get_sensitivity_metric()
68
68
  self._cuts = None
69
69
 
70
- self.ru_metrics = target_resource_utilization.get_restricted_metrics()
70
+ # To define RU Total constraints we need to compute weights and activations even if they have no constraints
71
+ # TODO currently this logic is duplicated in linear_programming.py
72
+ targets = target_resource_utilization.get_restricted_metrics()
73
+ if RUTarget.TOTAL in targets:
74
+ targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
75
+ self.ru_targets_to_compute = targets
76
+
71
77
  self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
72
78
  self.target_resource_utilization = target_resource_utilization
73
79
  self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
74
80
  self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
75
- self.min_ru = self.ru_helper.compute_utilization(self.ru_metrics, self.min_ru_config)
76
- self.non_conf_ru_dict = self._non_configurable_nodes_ru()
81
+ self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
82
+ self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
77
83
 
78
84
  self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
79
85
  original_graph=self.original_graph)
@@ -111,18 +117,14 @@ class MixedPrecisionSearchManager:
111
117
  def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
112
118
  """
113
119
  Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
114
- The matrix is constructed as follows (for a given target):
115
- - Each row represents the set of resource utilization values for a specific resource utilization
116
- measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
117
- - Each entry in a specific column represents the resource utilization value of a given configuration
118
- (single layer is configured with specific candidate, all other layer are at the minimal resource
119
- utilization configuration) for the resource utilization measure of the respective row.
120
+ Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
120
121
 
121
122
  Args:
122
123
  target: The resource target for which the resource utilization is calculated (a RUTarget value).
123
124
 
124
- Returns: A resource utilization matrix.
125
-
125
+ Returns:
126
+ A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
127
+ depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
126
128
  """
127
129
  assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
128
130
 
@@ -132,21 +134,14 @@ class MixedPrecisionSearchManager:
132
134
  for c, c_n in enumerate(configurable_sorted_nodes):
133
135
  for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
134
136
  if candidate_idx == self.min_ru_config[c]:
135
- # skip ru computation for min configuration. Since we compute the difference from min_ru it'll
136
- # always be 0 for all entries in the results vector.
137
- candidate_rus = np.zeros(shape=self.min_ru[target].shape)
137
+ candidate_rus = self.min_ru[target]
138
138
  else:
139
- candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) - self.min_ru[target]
139
+ candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
140
140
 
141
141
  ru_matrix.append(np.asarray(candidate_rus))
142
142
 
143
- # We need to transpose the calculated ru matrix to allow later multiplication with
144
- # the indicators' diagonal matrix.
145
- # We only move the first axis (num of configurations) to be last,
146
- # the remaining axes include the metric specific nodes (rows dimension of the new tensor)
147
- # and the ru metric values (if they are non-scalars)
148
- np_ru_matrix = np.array(ru_matrix)
149
- return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
143
+ np_ru_matrix = np.array(ru_matrix) - self.min_ru[target] # num configurations X num elements
144
+ return np_ru_matrix
150
145
 
151
146
  def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
152
147
  """
@@ -162,7 +157,6 @@ class MixedPrecisionSearchManager:
162
157
 
163
158
  """
164
159
  cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
165
- # TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
166
160
  return self.ru_helper.compute_utilization({target}, cfg)[target]
167
161
 
168
162
  @staticmethod
@@ -183,18 +177,6 @@ class MixedPrecisionSearchManager:
183
177
  updated_cfg[idx] = value
184
178
  return updated_cfg
185
179
 
186
- def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
187
- """
188
- Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
189
- resource utilization targets.
190
-
191
- Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
192
- """
193
- ru_metrics = self.ru_metrics - {RUTarget.BOPS}
194
- ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
195
- ru[RUTarget.BOPS] = None
196
- return ru
197
-
198
180
  def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
199
181
  """
200
182
  Computes the resource utilization values for a given mixed-precision configuration.
@@ -206,7 +188,7 @@ class MixedPrecisionSearchManager:
206
188
  with the given config.
207
189
 
208
190
  """
209
- act_qcs, w_qcs = self.ru_helper.get_configurable_qcs(config)
191
+ act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
210
192
  ru = self.ru_helper.ru_calculator.compute_resource_utilization(
211
193
  target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
212
194
  w_qcs=w_qcs)
@@ -88,7 +88,7 @@ class Utilization(NamedTuple):
88
88
  # Needed for sum (with default start_value=0).
89
89
  if other == 0:
90
90
  return self
91
- return self + other
91
+ return self + other # pragma: no cover
92
92
 
93
93
  def __gt__(self, other: 'Utilization'):
94
94
  # Needed for max. Compare by bytes.
@@ -96,7 +96,7 @@ class Utilization(NamedTuple):
96
96
 
97
97
  def __lt__(self, other: 'Utilization'):
98
98
  # Needed for min. Compare by bytes.
99
- return self.bytes < other.bytes
99
+ return self.bytes < other.bytes # pragma: no cover
100
100
 
101
101
 
102
102
  class ResourceUtilizationCalculator:
@@ -119,7 +119,21 @@ class ResourceUtilizationCalculator:
119
119
  for n in graph.nodes:
120
120
  self._act_tensors_size[n] = n.get_total_output_params()
121
121
  self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
122
- self._cuts = None
122
+ self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
123
+
124
+ @property
125
+ def cuts(self) -> Dict[Cut, List[BaseNode]]:
126
+ """ Compute if needed and return graph cuts and their memory element nodes. """
127
+ if self._cuts is None:
128
+ memory_graph = MemoryGraph(deepcopy(self.graph))
129
+ _, _, cuts = compute_graph_max_cut(memory_graph)
130
+ if cuts is None: # pragma: no cover
131
+ raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
132
+ cuts = [cut for cut in cuts if cut.mem_elements.elements]
133
+ # cache cuts nodes for future use, so do not filter by target
134
+ self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
135
+ for cut in cuts}
136
+ return self._cuts
123
137
 
124
138
  def compute_resource_utilization(self,
125
139
  target_criterion: TargetInclusionCriterion,
@@ -152,10 +166,10 @@ class ResourceUtilizationCalculator:
152
166
  elif w_qcs is not None: # pragma: no cover
153
167
  raise ValueError('Weight configuration passed but no relevant metric requested.')
154
168
 
155
- if act_qcs and not {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets): # pragma: no cover
156
- raise ValueError('Activation configuration passed but no relevant metric requested.')
157
- if RUTarget.ACTIVATION in ru_targets:
169
+ if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
158
170
  a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
171
+ elif act_qcs is not None: # pragma: no cover
172
+ raise ValueError('Activation configuration passed but no relevant metric requested.')
159
173
 
160
174
  ru = ResourceUtilization()
161
175
  if RUTarget.WEIGHTS in ru_targets:
@@ -163,9 +177,7 @@ class ResourceUtilizationCalculator:
163
177
  if RUTarget.ACTIVATION in ru_targets:
164
178
  ru.activation_memory = a_total
165
179
  if RUTarget.TOTAL in ru_targets:
166
- # TODO use maxcut
167
- act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
168
- ru.total_memory = w_total + act_tensors_total
180
+ ru.total_memory = w_total + a_total
169
181
  if RUTarget.BOPS in ru_targets:
170
182
  ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
171
183
  bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
@@ -262,12 +274,12 @@ class ResourceUtilizationCalculator:
262
274
  Returns:
263
275
  Total activation utilization of the network.
264
276
  """
265
- return self.compute_cut_activation_utilization(target_criterion, bitwidth_mode, act_qcs)[0]
277
+ return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
266
278
 
267
- def compute_cut_activation_utilization(self,
268
- target_criterion: TargetInclusionCriterion,
269
- bitwidth_mode: BitwidthMode,
270
- act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
279
+ def compute_activation_utilization_by_cut(self,
280
+ target_criterion: TargetInclusionCriterion,
281
+ bitwidth_mode: BitwidthMode,
282
+ act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
271
283
  -> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
272
284
  """
273
285
  Compute graph activation cuts utilization.
@@ -292,20 +304,10 @@ class ResourceUtilizationCalculator:
292
304
  if not graph_target_nodes:
293
305
  return 0, {}, {}
294
306
 
295
- if self._cuts is None:
296
- memory_graph = MemoryGraph(deepcopy(self.graph))
297
- _, _, cuts = compute_graph_max_cut(memory_graph)
298
- if cuts is None: # pragma: no cover
299
- raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
300
- cuts = [cut for cut in cuts if cut.mem_elements.elements]
301
- # cache cuts nodes for future use, so do not filter by target
302
- self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
303
- for cut in cuts}
304
-
305
307
  util_per_cut: Dict[Cut, Utilization] = {} # type: ignore
306
308
  util_per_cut_per_node = defaultdict(dict)
307
- for cut in self._cuts:
308
- cut_target_nodes = [n for n in self._cuts[cut] if n in graph_target_nodes]
309
+ for cut in self.cuts:
310
+ cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
309
311
  if not cut_target_nodes:
310
312
  continue
311
313
  for n in cut_target_nodes:
@@ -322,7 +324,7 @@ class ResourceUtilizationCalculator:
322
324
  bitwidth_mode: BitwidthMode,
323
325
  act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
324
326
  include_reused=False) \
325
- -> Tuple[float, Dict[BaseNode, Utilization]]:
327
+ -> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover
326
328
  """
327
329
  Compute resource utilization for graph's activations tensors.
328
330
 
@@ -462,7 +464,6 @@ class ResourceUtilizationCalculator:
462
464
  node_bops = a_nbits * w_nbits * node_mac
463
465
  return node_bops
464
466
 
465
- @lru_cache
466
467
  def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
467
468
  """
468
469
  Retrieve target nodes from a cut filtered by a criterion.
@@ -474,7 +475,7 @@ class ResourceUtilizationCalculator:
474
475
  Returns:
475
476
  A list of target nodes from a cut.
476
477
  """
477
- cut_nodes = [self.graph.find_node_by_name(e.node_name)[0] for e in cut.mem_elements.elements]
478
+ cut_nodes = self.cuts[cut]
478
479
  return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
479
480
 
480
481
  def _get_target_weight_nodes(self,
@@ -500,7 +501,7 @@ class ResourceUtilizationCalculator:
500
501
  quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
501
502
  configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
502
503
  nodes = [n for n in quantized if n not in configurable]
503
- elif target_criterion == TargetInclusionCriterion.Any:
504
+ elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover
504
505
  nodes = list(self.graph.nodes)
505
506
  else: # pragma: no cover
506
507
  raise ValueError(f'Unknown {target_criterion}.')
@@ -566,15 +567,15 @@ class ResourceUtilizationCalculator:
566
567
  Selected nodes.
567
568
  """
568
569
  nodes = nodes or self.graph.nodes
569
- if target_criterion == TargetInclusionCriterion.QConfigurable:
570
+ if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover
570
571
  nodes = [n for n in nodes if n.has_configurable_activation()]
571
572
  elif target_criterion == TargetInclusionCriterion.AnyQuantized:
572
573
  nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
573
- elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
574
+ elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover
574
575
  nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
575
576
  elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
576
577
  raise ValueError(f'Unknown {target_criterion}.')
577
- if not include_reused:
578
+ if not include_reused: # pragma: no cover
578
579
  nodes = [n for n in nodes if not n.reuse]
579
580
  return nodes
580
581
 
@@ -664,4 +665,4 @@ class ResourceUtilizationCalculator:
664
665
  f'as it {len(w_qcs)}!=1 unique candidates.')
665
666
  return w_qcs[0].weights_n_bits
666
667
 
667
- raise ValueError(f'Unknown mode {bitwidth_mode.name}')
668
+ raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover
@@ -16,7 +16,7 @@
16
16
  import numpy as np
17
17
  from pulp import *
18
18
  from tqdm import tqdm
19
- from typing import Dict, Tuple
19
+ from typing import Dict, Tuple, Set, Any
20
20
 
21
21
  from model_compression_toolkit.logger import Logger
22
22
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
@@ -167,95 +167,95 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
167
167
  indicators_arr = np.array(indicators)
168
168
  indicators_matrix = np.diag(indicators_arr)
169
169
 
170
- for target, ru_value in target_resource_utilization.get_resource_utilization_dict().items():
171
- if not np.isinf(ru_value):
172
- non_conf_ru_vector = None if search_manager.non_conf_ru_dict is None \
173
- else search_manager.non_conf_ru_dict.get(target)
174
- _add_set_of_ru_constraints(search_manager=search_manager,
175
- target=target,
176
- target_resource_utilization_value=ru_value,
177
- indicators_matrix=indicators_matrix,
178
- lp_problem=lp_problem,
179
- non_conf_ru_vector=non_conf_ru_vector)
170
+ _add_ru_constraints(search_manager=search_manager,
171
+ target_resource_utilization=target_resource_utilization,
172
+ indicators_matrix=indicators_matrix,
173
+ lp_problem=lp_problem,
174
+ non_conf_ru_dict=search_manager.non_conf_ru_dict)
180
175
  else: # pragma: no cover
181
176
  Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
182
177
  "A valid 'target_resource_utilization' is required.")
183
178
  return lp_problem
184
179
 
185
180
 
186
- def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
187
- target: RUTarget,
188
- target_resource_utilization_value: float,
189
- indicators_matrix: np.ndarray,
190
- lp_problem: LpProblem,
191
- non_conf_ru_vector: np.ndarray):
181
+ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
182
+ target_resource_utilization: ResourceUtilization,
183
+ indicators_matrix: np.ndarray,
184
+ lp_problem: LpProblem,
185
+ non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]):
192
186
  """
193
- Adding a constraint for the Lp problem for the given target resource utilization.
187
+ Adding targets constraints for the Lp problem for the given target resource utilization.
194
188
  The update to the Lp problem object is done inplace.
195
189
 
196
190
  Args:
197
191
  search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
198
- target: A RUTarget.
199
- target_resource_utilization_value: Target resource utilization value of the given target resource utilization
200
- for which the constraint is added.
192
+ target_resource_utilization: Target resource utilization.
201
193
  indicators_matrix: A diagonal matrix of the Lp problem's indicators.
202
194
  lp_problem: An Lp problem object to add constraint to.
203
- non_conf_ru_vector: A non-configurable nodes' resource utilization vector.
204
-
195
+ non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
205
196
  """
197
+ ru_indicated_vectors = {}
198
+ # targets to add constraints for
199
+ constraints_targets = target_resource_utilization.get_restricted_metrics()
200
+ # to add constraints for Total target we need to compute weight and activation
201
+ targets_to_compute = constraints_targets
202
+ if RUTarget.TOTAL in constraints_targets:
203
+ targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
204
+
205
+ for target in targets_to_compute:
206
+ ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
207
+ indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
208
+
209
+ # Sum the indicated values over all configurations, and add the value for minimal configuration once.
210
+ # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
211
+ # that would be required if that configuration is selected).
212
+ # Each element in a vector is an lp object representing the configurations sum term for a memory element.
213
+ ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
214
+
215
+ non_conf_ru_vec = non_conf_ru_dict[target]
216
+ if non_conf_ru_vec is not None and non_conf_ru_vec.size:
217
+ # add non-conf value as additional mem elements so that they get aggregated
218
+ ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
219
+ ru_indicated_vectors[target] = ru_vec
220
+
221
+ # add constraints only for the restricted targets in target resource utilization.
222
+ for target in constraints_targets:
223
+ target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
224
+ aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
225
+ for v in aggr_ru:
226
+ if isinstance(v, float):
227
+ if v > target_resource_utilization_value:
228
+ Logger.critical(
229
+ f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
230
+ f"with the value {target_resource_utilization_value}.") # pragma: no cover
231
+ else:
232
+ lp_problem += v <= target_resource_utilization_value
233
+
206
234
 
207
- ru_matrix = search_manager.compute_resource_utilization_matrix(target)
208
- indicated_ru_matrix = np.matmul(ru_matrix, indicators_matrix)
209
- # Need to re-organize the tensor such that the configurations' axis will be second,
210
- # and all metric values' axis will come afterword
211
- indicated_ru_matrix = np.moveaxis(indicated_ru_matrix, source=len(indicated_ru_matrix.shape) - 1, destination=1)
212
-
213
- # In order to get the result resource utilization according to a chosen set of indicators, we sum each row in
214
- # the result matrix. Each row represents the resource utilization values for a specific resource utilization metric,
215
- # such that only elements corresponding to a configuration which implied by the set of indicators will have some
216
- # positive value different than 0 (and will contribute to the total resource utilization).
217
- ru_sum_vector = np.array([
218
- np.sum(indicated_ru_matrix[i], axis=0) + # sum of metric values over all configurations in a row
219
- search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])])
220
-
221
- ru_vec = ru_sum_vector
222
- if non_conf_ru_vector is not None and non_conf_ru_vector.size:
223
- ru_vec = np.concatenate([ru_vec, non_conf_ru_vector])
224
-
225
- aggr_ru = _aggregate_for_lp(ru_vec, target)
226
- for v in aggr_ru:
227
- if isinstance(v, float):
228
- if v > target_resource_utilization_value:
229
- Logger.critical(
230
- f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
231
- f"with the value {target_resource_utilization_value}.") # pragma: no cover
232
- else:
233
- lp_problem += v <= target_resource_utilization_value
234
-
235
-
236
- def _aggregate_for_lp(ru_vec, target: RUTarget) -> list:
235
+ def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
237
236
  """
238
237
  Aggregate resource utilization values for the LP.
239
238
 
240
239
  Args:
241
- ru_vec: a vector of resource utilization values.
240
+ targets_ru_vec: resource utilization vectors for all precomputed targets.
242
241
  target: resource utilization target.
243
242
 
244
243
  Returns:
245
244
  Aggregated resource utilization.
246
245
  """
247
246
  if target == RUTarget.TOTAL:
248
- w = lpSum(v[0] for v in ru_vec)
249
- return [w + v[1] for v in ru_vec]
247
+ w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
248
+ act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
249
+ return [w + v for v in act_ru_vec]
250
250
 
251
251
  if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
252
- return [lpSum(ru_vec)]
252
+ return [lpSum(targets_ru_vec[target])]
253
253
 
254
254
  if target == RUTarget.ACTIVATION:
255
255
  # for max aggregation, each value constitutes a separate constraint
256
- return list(ru_vec)
256
+ return list(targets_ru_vec[target])
257
257
 
258
- raise ValueError(f'Unexpected target {target}.')
258
+ raise ValueError(f'Unexpected target {target}.') # pragma: no cover
259
259
 
260
260
 
261
261
  def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,