mct-nightly 2.3.0.20250323.559__py3-none-any.whl → 2.3.0.20250325.524__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (17) hide show
  1. {mct_nightly-2.3.0.20250323.559.dist-info → mct_nightly-2.3.0.20250325.524.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.3.0.20250323.559.dist-info → mct_nightly-2.3.0.20250325.524.dist-info}/RECORD +17 -17
  3. {mct_nightly-2.3.0.20250323.559.dist-info → mct_nightly-2.3.0.20250325.524.dist-info}/WHEEL +1 -1
  4. model_compression_toolkit/__init__.py +1 -1
  5. model_compression_toolkit/core/common/graph/base_graph.py +14 -4
  6. model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +32 -96
  7. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +17 -42
  8. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +179 -60
  9. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +22 -10
  10. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +1 -5
  11. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +14 -94
  12. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +132 -312
  13. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +1 -1
  14. model_compression_toolkit/core/pytorch/reader/graph_builders.py +2 -0
  15. model_compression_toolkit/core/runner.py +2 -12
  16. {mct_nightly-2.3.0.20250323.559.dist-info → mct_nightly-2.3.0.20250325.524.dist-info}/licenses/LICENSE.md +0 -0
  17. {mct_nightly-2.3.0.20250323.559.dist-info → mct_nightly-2.3.0.20250325.524.dist-info}/top_level.txt +0 -0
@@ -12,326 +12,146 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
-
16
15
  import numpy as np
17
16
  from pulp import *
18
- from tqdm import tqdm
19
- from typing import Dict, Tuple, Any, Optional
17
+ from typing import Dict, Tuple
20
18
 
21
- from model_compression_toolkit.logger import Logger
22
- from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
23
- from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
19
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
24
20
 
25
21
  # Limit ILP solver runtime in seconds
26
22
  SOLVER_TIME_LIMIT = 60
27
23
 
28
24
 
29
- def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
30
- target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
31
- """
32
- Searching and returning a mixed-precision configuration using an ILP optimization solution.
33
- It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
34
- bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
35
- Then, it creates a mapping from each node's index (in the graph) to a dictionary
36
- that maps the bitwidth index to the contribution of configuring this node with this
37
- bitwidth to the minimal possible resource utilization of the model.
38
- Then, and using these mappings, it builds an LP problem and finds an optimal solution.
39
- If a solution could not be found, exception is thrown.
40
-
41
- Args:
42
- search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
43
- target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory
44
- consumption).
45
-
46
- Returns:
47
- The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
25
+ class MixedPrecisionIntegerLPSolver:
26
+ """ Integer Linear Programming solver for Mixed Precision.
48
27
 
28
+ Args:
29
+ layer_to_sensitivity_mapping: sensitivity per candidate per layer.
30
+ candidates_ru: resource utilization per candidate.
31
+ ru_constraints: resource utilization constraints corresponding to 'candidates_ru'.
49
32
  """
50
-
51
- # Build a mapping from each layer's index (in the model) to a dictionary that maps the
52
- # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
53
-
54
- if target_resource_utilization is None or search_manager is None:
55
- Logger.critical("Invalid parameters: 'target_resource_utilization' and 'search_manager' must not be 'None' "
56
- "for mixed-precision search. Ensure valid inputs are provided.")
57
-
58
- layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization)
59
-
60
- # Init variables to find their values when solving the lp problem.
61
- layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping)
62
-
63
- # Add all equations and inequalities that define the problem.
64
- lp_problem = _formalize_problem(layer_to_indicator_vars_mapping,
65
- layer_to_metrics_mapping,
66
- layer_to_objective_vars_mapping,
67
- target_resource_utilization,
68
- search_manager)
69
-
70
- # Use default PULP solver. Limit runtime in seconds
71
- solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
72
- lp_problem.solve(solver=solver) # Try to solve the problem.
73
-
74
- assert lp_problem.status == LpStatusOptimal, Logger.critical(
75
- "No solution was found during solving the LP problem")
76
- Logger.info(f"ILP status: {LpStatus[lp_problem.status]}")
77
-
78
- # Take the bitwidth index only if its corresponding indicator is one.
79
- config = np.asarray(
80
- [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
81
- nbits_to_indicator
82
- in layer_to_indicator_vars_mapping.values()]
83
- ).flatten()
84
-
85
- if target_resource_utilization.bops_restricted():
86
- return search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
87
- else:
88
- return config
89
-
90
-
91
- def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
92
- Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
93
- """
94
- Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
95
- and a variable for each indicator for whether we use the former variable or not.
96
-
97
- Args:
98
- layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
99
- bitwidth index to the observed sensitivity of the model.
100
-
101
- Returns:
102
- A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
103
- and the second for indicators for each variable.
104
- """
105
-
106
- layer_to_indicator_vars_mapping = dict()
107
- layer_to_objective_vars_mapping = dict()
108
-
109
- for layer, nbits_to_metric in layer_to_metrics_mapping.items():
110
- layer_to_indicator_vars_mapping[layer] = dict()
111
-
112
- for nbits in nbits_to_metric.keys():
113
- layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
114
- lowBound=0,
115
- upBound=1,
116
- cat=LpInteger)
117
-
118
- layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
119
-
120
- return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
121
-
122
-
123
- def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVariable]],
124
- layer_to_metrics_mapping: Dict[int, Dict[int, float]],
125
- layer_to_objective_vars_mapping: Dict[int, LpVariable],
126
- target_resource_utilization: ResourceUtilization,
127
- search_manager: MixedPrecisionSearchManager) -> LpProblem:
128
- """
129
- Formalize the LP problem by defining all inequalities that define the solution space.
130
-
131
- Args:
132
- layer_to_indicator_vars_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to
133
- indicator variable.
134
- layer_to_metrics_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to sensitivity
135
- evaluation.
136
- layer_to_objective_vars_mapping: Dictionary that maps each node's index to a bitwidth variable we find its
137
- value.
138
- target_resource_utilization: Target resource utilization to reduce our feasible solution space.
139
- search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
140
-
141
- Returns:
142
- The formalized LP problem.
143
- """
144
-
145
- lp_problem = LpProblem() # minimization problem by default
146
- lp_problem += lpSum([layer_to_objective_vars_mapping[layer] for layer in
147
- layer_to_metrics_mapping.keys()]) # Objective (minimize acc loss)
148
-
149
- for layer in layer_to_metrics_mapping.keys():
150
- # Use every bitwidth for every layer with its indicator.
151
- lp_problem += lpSum([indicator * layer_to_metrics_mapping[layer][nbits]
152
- for nbits, indicator in layer_to_indicator_vars_mapping[layer].items()]) == \
153
- layer_to_objective_vars_mapping[layer]
154
-
155
- # Constraint of only one indicator==1
156
- lp_problem += lpSum(
157
- [v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1
158
-
159
- # Bound the feasible solution space with the desired resource utilization values.
160
- # Creates separate constraints for weights utilization and activation utilization.
161
- if target_resource_utilization is not None:
33
+ def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]],
34
+ candidates_ru: Dict[RUTarget, np.ndarray],
35
+ ru_constraints: Dict[RUTarget, np.ndarray]):
36
+ self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping
37
+ self.candidates_ru = candidates_ru
38
+ self.ru_constraints = ru_constraints
39
+
40
+ self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = (
41
+ self._init_problem_vars(layer_to_sensitivity_mapping))
42
+
43
+ def run(self) -> List[int]:
44
+ """
45
+ Build and solve an ILP optimization problem.
46
+
47
+ Returns:
48
+ The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
49
+
50
+ """
51
+ # Add all equations and inequalities that define the problem.
52
+ lp_problem = self._formalize_problem()
53
+
54
+ # Use default PULP solver. Limit runtime in seconds
55
+ solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
56
+ lp_problem.solve(solver=solver) # Try to solve the problem.
57
+
58
+ if lp_problem.status != LpStatusOptimal:
59
+ raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}')
60
+
61
+ # Take the bitwidth index only if its corresponding indicator is one.
62
+ config = np.asarray(
63
+ [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
64
+ nbits_to_indicator
65
+ in self.layer_to_indicator_vars_mapping.values()]
66
+ ).flatten()
67
+
68
+ return config.tolist()
69
+
70
+ @staticmethod
71
+ def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
72
+ Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
73
+ """
74
+ Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
75
+ and a variable for each indicator for whether we use the former variable or not.
76
+
77
+ Args:
78
+ layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
79
+ bitwidth index to the observed sensitivity of the model.
80
+
81
+ Returns:
82
+ A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
83
+ and the second for indicators for each variable.
84
+ """
85
+
86
+ layer_to_indicator_vars_mapping = dict()
87
+ layer_to_objective_vars_mapping = dict()
88
+
89
+ for layer, nbits_to_metric in layer_to_metrics_mapping.items():
90
+ layer_to_indicator_vars_mapping[layer] = dict()
91
+
92
+ for nbits in nbits_to_metric.keys():
93
+ layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
94
+ lowBound=0,
95
+ upBound=1,
96
+ cat=LpInteger)
97
+
98
+ layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
99
+
100
+ return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
101
+
102
+ def _formalize_problem(self) -> LpProblem:
103
+ """
104
+ Formalize the LP problem by defining all inequalities that define the solution space.
105
+
106
+ Returns:
107
+ The formalized LP problem.
108
+ """
109
+
110
+ lp_problem = LpProblem() # minimization problem by default
111
+ lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in
112
+ self.layer_to_sensitivity_mapping.keys()]) # Objective (minimize acc loss)
113
+
114
+ for layer in self.layer_to_sensitivity_mapping.keys():
115
+ # Use every bitwidth for every layer with its indicator.
116
+ lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits]
117
+ for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \
118
+ self.layer_to_objective_vars_mapping[layer]
119
+
120
+ # Constraint of only one indicator==1
121
+ lp_problem += lpSum(
122
+ [v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1
123
+
124
+ # Bound the feasible solution space with the desired resource utilization values.
125
+ self._add_ru_constraints(lp_problem=lp_problem)
126
+
127
+ return lp_problem
128
+
129
+ def _add_ru_constraints(self, lp_problem: LpProblem):
130
+ """
131
+ Adding targets constraints for the Lp problem for the given target resource utilization.
132
+ The update to the Lp problem object is done inplace.
133
+
134
+ Args:
135
+ lp_problem: An Lp problem object to add constraint to.
136
+ """
162
137
  indicators = []
163
- for layer in layer_to_metrics_mapping.keys():
164
- for _, indicator in layer_to_indicator_vars_mapping[layer].items():
165
- indicators.append(indicator)
166
-
167
- indicators_arr = np.array(indicators)
168
- indicators_matrix = np.diag(indicators_arr)
169
-
170
- _add_ru_constraints(search_manager=search_manager,
171
- target_resource_utilization=target_resource_utilization,
172
- indicators_matrix=indicators_matrix,
173
- lp_problem=lp_problem,
174
- non_conf_ru_dict=search_manager.non_conf_ru_dict)
175
- else: # pragma: no cover
176
- Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
177
- "A valid 'target_resource_utilization' is required.")
178
- return lp_problem
179
-
180
-
181
- def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
182
- target_resource_utilization: ResourceUtilization,
183
- indicators_matrix: np.ndarray,
184
- lp_problem: LpProblem,
185
- non_conf_ru_dict: Dict[RUTarget, np.ndarray]):
186
- """
187
- Adding targets constraints for the Lp problem for the given target resource utilization.
188
- The update to the Lp problem object is done inplace.
189
-
190
- Args:
191
- search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
192
- target_resource_utilization: Target resource utilization.
193
- indicators_matrix: A diagonal matrix of the Lp problem's indicators.
194
- lp_problem: An Lp problem object to add constraint to.
195
- non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
196
- """
197
- ru_indicated_vectors = {}
198
- # targets to add constraints for
199
- constraints_targets = target_resource_utilization.get_restricted_targets()
200
- # to add constraints for Total target we need to compute weight and activation
201
- targets_to_compute = constraints_targets
202
- if RUTarget.TOTAL in constraints_targets:
203
- targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
204
-
205
- for target in targets_to_compute:
206
- ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
207
- indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
208
-
209
- # Sum the indicated values over all configurations, and add the value for minimal configuration once.
210
- # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
211
- # that would be required if that configuration is selected).
212
- # Each element in a vector is an lp object representing the configurations sum term for a memory element.
213
- ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
214
-
215
- non_conf_ru_vec = non_conf_ru_dict[target]
216
- if non_conf_ru_vec is not None and non_conf_ru_vec.size:
217
- # add non-conf value as additional mem elements so that they get aggregated
218
- ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
219
- ru_indicated_vectors[target] = ru_vec
220
-
221
- # Add constraints only for the restricted targets in target resource utilization.
222
- # Adding activation constraints modifies the lp term in ru_indicated_vectors, so if both activation and total
223
- # are restricted we first add the constraints for total.
224
- if RUTarget.TOTAL in constraints_targets and RUTarget.ACTIVATION in constraints_targets:
225
- constraints_targets.remove(RUTarget.ACTIVATION)
226
- constraints_targets = list(constraints_targets) + [RUTarget.ACTIVATION]
227
- for target in constraints_targets:
228
- target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
229
- aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
230
- for v in aggr_ru:
231
- if isinstance(v, float):
232
- if v > target_resource_utilization_value:
233
- Logger.critical(
234
- f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
235
- f"with the value {target_resource_utilization_value}.") # pragma: no cover
236
- else:
237
- lp_problem += v <= target_resource_utilization_value
238
-
239
-
240
- def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
241
- """
242
- Aggregate resource utilization values for the LP.
243
-
244
- Args:
245
- targets_ru_vec: resource utilization vectors for all precomputed targets.
246
- target: resource utilization target.
247
-
248
- Returns:
249
- Aggregated resource utilization.
250
- """
251
- if target == RUTarget.TOTAL:
252
- w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
253
- act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
254
- return [w + v for v in act_ru_vec]
255
-
256
- if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
257
- return [lpSum(targets_ru_vec[target])]
258
-
259
- if target == RUTarget.ACTIVATION:
260
- # for max aggregation, each value constitutes a separate constraint
261
- return list(targets_ru_vec[target])
262
-
263
- raise ValueError(f'Unexpected target {target}.') # pragma: no cover
264
-
265
-
266
- def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
267
- target_resource_utilization: ResourceUtilization,
268
- eps: float = EPS) -> Dict[int, Dict[int, float]]:
269
- """
270
- This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
271
- It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
272
- For each node and some possible node's bitwidth (according to the given search space), we use
273
- the framework function compute_metric_fn in order to infer
274
- a batch of images, and compute (using the inference results) the sensitivity metric of
275
- the configured mixed-precision model.
276
-
277
- Args:
278
- search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
279
- target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations
280
- (like model' weights memory consumption).
281
- eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
282
-
283
- Returns:
284
- Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
285
- the sensitivity of the model.
286
-
287
- """
288
-
289
- Logger.info('Starting to evaluate metrics')
290
- layer_to_metrics_mapping = {}
291
-
292
- if target_resource_utilization.bops_restricted():
293
- origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
294
- max_config_value = search_manager.compute_metric_fn(origin_max_config)
295
- else:
296
- max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config)
297
-
298
- for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(),
299
- total=len(search_manager.layer_to_bitwidth_mapping)):
300
- layer_to_metrics_mapping[node_idx] = {}
301
-
302
- for bitwidth_idx in layer_possible_bitwidths_indices:
303
- if search_manager.max_ru_config[node_idx] == bitwidth_idx:
304
- # This is a computation of the metric for the max configuration, assign pre-calculated value
305
- layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
306
- continue
307
-
308
- # Create a configuration that differs at one layer only from the baseline model
309
- mp_model_configuration = search_manager.max_ru_config.copy()
310
- mp_model_configuration[node_idx] = bitwidth_idx
311
-
312
- # Build a distance matrix using the function we got from the framework implementation.
313
- if target_resource_utilization.bops_restricted():
314
- # Reconstructing original graph's configuration from virtual graph's configuration
315
- origin_mp_model_configuration = \
316
- search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
317
- mp_model_configuration,
318
- changed_virtual_nodes_idx=[node_idx],
319
- original_base_config=origin_max_config)
320
- origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
321
- c != origin_mp_model_configuration[i]]
322
- metric_value = search_manager.compute_metric_fn(
323
- origin_mp_model_configuration,
324
- origin_changed_nodes_indices,
325
- origin_max_config)
326
- else:
327
- metric_value = search_manager.compute_metric_fn(
328
- mp_model_configuration,
329
- [node_idx],
330
- search_manager.max_ru_config)
331
-
332
- layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
333
-
334
- # Finalize distance metric mapping
335
- search_manager.finalize_distance_metric(layer_to_metrics_mapping)
336
-
337
- return layer_to_metrics_mapping
138
+ for layer in self.layer_to_sensitivity_mapping:
139
+ indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values()))
140
+ indicators_vec = np.array(indicators)
141
+
142
+ for target, ru_matrix in self.candidates_ru.items():
143
+ # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
144
+ # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
145
+ assert ru_matrix.ndim == 2
146
+ if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
147
+ assert ru_matrix.shape[1] == 1
148
+
149
+ indicated_ru_matrix = ru_matrix.T * indicators_vec
150
+ # build lp sum term over all candidates
151
+ ru_vec = indicated_ru_matrix.sum(axis=1)
152
+
153
+ # For cumulative metrics a single constraint is added, for max metrics a separate constraint
154
+ # is added for each memory element (each element < target => max element < target).
155
+ assert len(ru_vec) == len(self.ru_constraints[target])
156
+ for v, c in zip(ru_vec, self.ru_constraints[target]):
157
+ lp_problem += v <= c
@@ -63,7 +63,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
63
63
  # layer has max config in the given solution, nothing to optimize
64
64
  continue
65
65
 
66
- current_node = search_manager.graph.get_configurable_sorted_nodes(search_manager.fw_info)[node_idx]
66
+ current_node = search_manager.mp_topo_configurable_nodes[node_idx]
67
67
  node_candidates = current_node.candidates_quantization_cfg
68
68
 
69
69
  # only weights kernel attribute is quantized with weights mixed precision
@@ -220,6 +220,8 @@ def nodes_builder(model: GraphModule,
220
220
  node_type = getattr(torch, node.target)
221
221
  elif hasattr(torch.Tensor, node.target):
222
222
  node_type = getattr(torch.Tensor, node.target)
223
+ if node_type==torch.Tensor.to:
224
+ Logger.critical(f"The call method \"to\" is not supported. Please consider moving \"torch.Tensor.to\" operations to init code.") # pragma: no cover
223
225
  else:
224
226
  Logger.critical(f"The call method '{node.target}' in {node} is not supported.") # pragma: no cover
225
227
 
@@ -32,8 +32,6 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
32
32
  ResourceUtilization
33
33
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
34
34
  ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
35
- from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import \
36
- requires_mixed_precision
37
35
  from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
38
36
  from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
39
37
  from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \
@@ -95,16 +93,8 @@ def core_runner(in_model: Any,
95
93
  "enabled, but the provided MixedPrecisionQuantizationConfig is None.")
96
94
  if target_resource_utilization.activation_restricted() or target_resource_utilization.total_mem_restricted():
97
95
  Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.")
98
- # Determine whether to use mixed precision or single precision based on target_resource_utilization.
99
- if requires_mixed_precision(in_model,
100
- target_resource_utilization,
101
- representative_data_gen,
102
- core_config,
103
- fqc,
104
- fw_info,
105
- fw_impl):
106
- core_config.mixed_precision_config.set_mixed_precision_enable()
107
- Logger.info('Mixed precision enabled.')
96
+ core_config.mixed_precision_config.set_mixed_precision_enable()
97
+ Logger.info('Mixed precision enabled.')
108
98
 
109
99
  graph = graph_preparation_runner(in_model,
110
100
  representative_data_gen,