mct-nightly 2.3.0.20250322.517__py3-none-any.whl → 2.3.0.20250324.606__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/METADATA +1 -1
- {mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/RECORD +16 -16
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/base_graph.py +14 -4
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +32 -96
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +17 -42
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +179 -60
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +22 -10
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +1 -5
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +14 -94
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +132 -312
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +1 -1
- model_compression_toolkit/core/runner.py +2 -12
- {mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/WHEEL +0 -0
- {mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/licenses/LICENSE.md +0 -0
- {mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/top_level.txt +0 -0
@@ -12,326 +12,146 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
|
16
15
|
import numpy as np
|
17
16
|
from pulp import *
|
18
|
-
from
|
19
|
-
from typing import Dict, Tuple, Any, Optional
|
17
|
+
from typing import Dict, Tuple
|
20
18
|
|
21
|
-
from model_compression_toolkit.
|
22
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
|
23
|
-
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
|
19
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
|
24
20
|
|
25
21
|
# Limit ILP solver runtime in seconds
|
26
22
|
SOLVER_TIME_LIMIT = 60
|
27
23
|
|
28
24
|
|
29
|
-
|
30
|
-
|
31
|
-
"""
|
32
|
-
Searching and returning a mixed-precision configuration using an ILP optimization solution.
|
33
|
-
It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
|
34
|
-
bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
|
35
|
-
Then, it creates a mapping from each node's index (in the graph) to a dictionary
|
36
|
-
that maps the bitwidth index to the contribution of configuring this node with this
|
37
|
-
bitwidth to the minimal possible resource utilization of the model.
|
38
|
-
Then, and using these mappings, it builds an LP problem and finds an optimal solution.
|
39
|
-
If a solution could not be found, exception is thrown.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
|
43
|
-
target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory
|
44
|
-
consumption).
|
45
|
-
|
46
|
-
Returns:
|
47
|
-
The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
|
25
|
+
class MixedPrecisionIntegerLPSolver:
|
26
|
+
""" Integer Linear Programming solver for Mixed Precision.
|
48
27
|
|
28
|
+
Args:
|
29
|
+
layer_to_sensitivity_mapping: sensitivity per candidate per layer.
|
30
|
+
candidates_ru: resource utilization per candidate.
|
31
|
+
ru_constraints: resource utilization constraints corresponding to 'candidates_ru'.
|
49
32
|
"""
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
# Constraint of only one indicator==1
|
156
|
-
lp_problem += lpSum(
|
157
|
-
[v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1
|
158
|
-
|
159
|
-
# Bound the feasible solution space with the desired resource utilization values.
|
160
|
-
# Creates separate constraints for weights utilization and activation utilization.
|
161
|
-
if target_resource_utilization is not None:
|
33
|
+
def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]],
|
34
|
+
candidates_ru: Dict[RUTarget, np.ndarray],
|
35
|
+
ru_constraints: Dict[RUTarget, np.ndarray]):
|
36
|
+
self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping
|
37
|
+
self.candidates_ru = candidates_ru
|
38
|
+
self.ru_constraints = ru_constraints
|
39
|
+
|
40
|
+
self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = (
|
41
|
+
self._init_problem_vars(layer_to_sensitivity_mapping))
|
42
|
+
|
43
|
+
def run(self) -> List[int]:
|
44
|
+
"""
|
45
|
+
Build and solve an ILP optimization problem.
|
46
|
+
|
47
|
+
Returns:
|
48
|
+
The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
|
49
|
+
|
50
|
+
"""
|
51
|
+
# Add all equations and inequalities that define the problem.
|
52
|
+
lp_problem = self._formalize_problem()
|
53
|
+
|
54
|
+
# Use default PULP solver. Limit runtime in seconds
|
55
|
+
solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
|
56
|
+
lp_problem.solve(solver=solver) # Try to solve the problem.
|
57
|
+
|
58
|
+
if lp_problem.status != LpStatusOptimal:
|
59
|
+
raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}')
|
60
|
+
|
61
|
+
# Take the bitwidth index only if its corresponding indicator is one.
|
62
|
+
config = np.asarray(
|
63
|
+
[[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
|
64
|
+
nbits_to_indicator
|
65
|
+
in self.layer_to_indicator_vars_mapping.values()]
|
66
|
+
).flatten()
|
67
|
+
|
68
|
+
return config.tolist()
|
69
|
+
|
70
|
+
@staticmethod
|
71
|
+
def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
|
72
|
+
Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
|
73
|
+
"""
|
74
|
+
Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
|
75
|
+
and a variable for each indicator for whether we use the former variable or not.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
|
79
|
+
bitwidth index to the observed sensitivity of the model.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
|
83
|
+
and the second for indicators for each variable.
|
84
|
+
"""
|
85
|
+
|
86
|
+
layer_to_indicator_vars_mapping = dict()
|
87
|
+
layer_to_objective_vars_mapping = dict()
|
88
|
+
|
89
|
+
for layer, nbits_to_metric in layer_to_metrics_mapping.items():
|
90
|
+
layer_to_indicator_vars_mapping[layer] = dict()
|
91
|
+
|
92
|
+
for nbits in nbits_to_metric.keys():
|
93
|
+
layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
|
94
|
+
lowBound=0,
|
95
|
+
upBound=1,
|
96
|
+
cat=LpInteger)
|
97
|
+
|
98
|
+
layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
|
99
|
+
|
100
|
+
return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
|
101
|
+
|
102
|
+
def _formalize_problem(self) -> LpProblem:
|
103
|
+
"""
|
104
|
+
Formalize the LP problem by defining all inequalities that define the solution space.
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
The formalized LP problem.
|
108
|
+
"""
|
109
|
+
|
110
|
+
lp_problem = LpProblem() # minimization problem by default
|
111
|
+
lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in
|
112
|
+
self.layer_to_sensitivity_mapping.keys()]) # Objective (minimize acc loss)
|
113
|
+
|
114
|
+
for layer in self.layer_to_sensitivity_mapping.keys():
|
115
|
+
# Use every bitwidth for every layer with its indicator.
|
116
|
+
lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits]
|
117
|
+
for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \
|
118
|
+
self.layer_to_objective_vars_mapping[layer]
|
119
|
+
|
120
|
+
# Constraint of only one indicator==1
|
121
|
+
lp_problem += lpSum(
|
122
|
+
[v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1
|
123
|
+
|
124
|
+
# Bound the feasible solution space with the desired resource utilization values.
|
125
|
+
self._add_ru_constraints(lp_problem=lp_problem)
|
126
|
+
|
127
|
+
return lp_problem
|
128
|
+
|
129
|
+
def _add_ru_constraints(self, lp_problem: LpProblem):
|
130
|
+
"""
|
131
|
+
Adding targets constraints for the Lp problem for the given target resource utilization.
|
132
|
+
The update to the Lp problem object is done inplace.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
lp_problem: An Lp problem object to add constraint to.
|
136
|
+
"""
|
162
137
|
indicators = []
|
163
|
-
for layer in
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
indicators_matrix: np.ndarray,
|
184
|
-
lp_problem: LpProblem,
|
185
|
-
non_conf_ru_dict: Dict[RUTarget, np.ndarray]):
|
186
|
-
"""
|
187
|
-
Adding targets constraints for the Lp problem for the given target resource utilization.
|
188
|
-
The update to the Lp problem object is done inplace.
|
189
|
-
|
190
|
-
Args:
|
191
|
-
search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
|
192
|
-
target_resource_utilization: Target resource utilization.
|
193
|
-
indicators_matrix: A diagonal matrix of the Lp problem's indicators.
|
194
|
-
lp_problem: An Lp problem object to add constraint to.
|
195
|
-
non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
|
196
|
-
"""
|
197
|
-
ru_indicated_vectors = {}
|
198
|
-
# targets to add constraints for
|
199
|
-
constraints_targets = target_resource_utilization.get_restricted_targets()
|
200
|
-
# to add constraints for Total target we need to compute weight and activation
|
201
|
-
targets_to_compute = constraints_targets
|
202
|
-
if RUTarget.TOTAL in constraints_targets:
|
203
|
-
targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
204
|
-
|
205
|
-
for target in targets_to_compute:
|
206
|
-
ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
|
207
|
-
indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
|
208
|
-
|
209
|
-
# Sum the indicated values over all configurations, and add the value for minimal configuration once.
|
210
|
-
# Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
|
211
|
-
# that would be required if that configuration is selected).
|
212
|
-
# Each element in a vector is an lp object representing the configurations sum term for a memory element.
|
213
|
-
ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
|
214
|
-
|
215
|
-
non_conf_ru_vec = non_conf_ru_dict[target]
|
216
|
-
if non_conf_ru_vec is not None and non_conf_ru_vec.size:
|
217
|
-
# add non-conf value as additional mem elements so that they get aggregated
|
218
|
-
ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
|
219
|
-
ru_indicated_vectors[target] = ru_vec
|
220
|
-
|
221
|
-
# Add constraints only for the restricted targets in target resource utilization.
|
222
|
-
# Adding activation constraints modifies the lp term in ru_indicated_vectors, so if both activation and total
|
223
|
-
# are restricted we first add the constraints for total.
|
224
|
-
if RUTarget.TOTAL in constraints_targets and RUTarget.ACTIVATION in constraints_targets:
|
225
|
-
constraints_targets.remove(RUTarget.ACTIVATION)
|
226
|
-
constraints_targets = list(constraints_targets) + [RUTarget.ACTIVATION]
|
227
|
-
for target in constraints_targets:
|
228
|
-
target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
|
229
|
-
aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
|
230
|
-
for v in aggr_ru:
|
231
|
-
if isinstance(v, float):
|
232
|
-
if v > target_resource_utilization_value:
|
233
|
-
Logger.critical(
|
234
|
-
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
235
|
-
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
236
|
-
else:
|
237
|
-
lp_problem += v <= target_resource_utilization_value
|
238
|
-
|
239
|
-
|
240
|
-
def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
|
241
|
-
"""
|
242
|
-
Aggregate resource utilization values for the LP.
|
243
|
-
|
244
|
-
Args:
|
245
|
-
targets_ru_vec: resource utilization vectors for all precomputed targets.
|
246
|
-
target: resource utilization target.
|
247
|
-
|
248
|
-
Returns:
|
249
|
-
Aggregated resource utilization.
|
250
|
-
"""
|
251
|
-
if target == RUTarget.TOTAL:
|
252
|
-
w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
|
253
|
-
act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
|
254
|
-
return [w + v for v in act_ru_vec]
|
255
|
-
|
256
|
-
if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
|
257
|
-
return [lpSum(targets_ru_vec[target])]
|
258
|
-
|
259
|
-
if target == RUTarget.ACTIVATION:
|
260
|
-
# for max aggregation, each value constitutes a separate constraint
|
261
|
-
return list(targets_ru_vec[target])
|
262
|
-
|
263
|
-
raise ValueError(f'Unexpected target {target}.') # pragma: no cover
|
264
|
-
|
265
|
-
|
266
|
-
def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
267
|
-
target_resource_utilization: ResourceUtilization,
|
268
|
-
eps: float = EPS) -> Dict[int, Dict[int, float]]:
|
269
|
-
"""
|
270
|
-
This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
|
271
|
-
It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
|
272
|
-
For each node and some possible node's bitwidth (according to the given search space), we use
|
273
|
-
the framework function compute_metric_fn in order to infer
|
274
|
-
a batch of images, and compute (using the inference results) the sensitivity metric of
|
275
|
-
the configured mixed-precision model.
|
276
|
-
|
277
|
-
Args:
|
278
|
-
search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
|
279
|
-
target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations
|
280
|
-
(like model' weights memory consumption).
|
281
|
-
eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
|
282
|
-
|
283
|
-
Returns:
|
284
|
-
Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
|
285
|
-
the sensitivity of the model.
|
286
|
-
|
287
|
-
"""
|
288
|
-
|
289
|
-
Logger.info('Starting to evaluate metrics')
|
290
|
-
layer_to_metrics_mapping = {}
|
291
|
-
|
292
|
-
if target_resource_utilization.bops_restricted():
|
293
|
-
origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
|
294
|
-
max_config_value = search_manager.compute_metric_fn(origin_max_config)
|
295
|
-
else:
|
296
|
-
max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config)
|
297
|
-
|
298
|
-
for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(),
|
299
|
-
total=len(search_manager.layer_to_bitwidth_mapping)):
|
300
|
-
layer_to_metrics_mapping[node_idx] = {}
|
301
|
-
|
302
|
-
for bitwidth_idx in layer_possible_bitwidths_indices:
|
303
|
-
if search_manager.max_ru_config[node_idx] == bitwidth_idx:
|
304
|
-
# This is a computation of the metric for the max configuration, assign pre-calculated value
|
305
|
-
layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
|
306
|
-
continue
|
307
|
-
|
308
|
-
# Create a configuration that differs at one layer only from the baseline model
|
309
|
-
mp_model_configuration = search_manager.max_ru_config.copy()
|
310
|
-
mp_model_configuration[node_idx] = bitwidth_idx
|
311
|
-
|
312
|
-
# Build a distance matrix using the function we got from the framework implementation.
|
313
|
-
if target_resource_utilization.bops_restricted():
|
314
|
-
# Reconstructing original graph's configuration from virtual graph's configuration
|
315
|
-
origin_mp_model_configuration = \
|
316
|
-
search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
|
317
|
-
mp_model_configuration,
|
318
|
-
changed_virtual_nodes_idx=[node_idx],
|
319
|
-
original_base_config=origin_max_config)
|
320
|
-
origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
|
321
|
-
c != origin_mp_model_configuration[i]]
|
322
|
-
metric_value = search_manager.compute_metric_fn(
|
323
|
-
origin_mp_model_configuration,
|
324
|
-
origin_changed_nodes_indices,
|
325
|
-
origin_max_config)
|
326
|
-
else:
|
327
|
-
metric_value = search_manager.compute_metric_fn(
|
328
|
-
mp_model_configuration,
|
329
|
-
[node_idx],
|
330
|
-
search_manager.max_ru_config)
|
331
|
-
|
332
|
-
layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
|
333
|
-
|
334
|
-
# Finalize distance metric mapping
|
335
|
-
search_manager.finalize_distance_metric(layer_to_metrics_mapping)
|
336
|
-
|
337
|
-
return layer_to_metrics_mapping
|
138
|
+
for layer in self.layer_to_sensitivity_mapping:
|
139
|
+
indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values()))
|
140
|
+
indicators_vec = np.array(indicators)
|
141
|
+
|
142
|
+
for target, ru_matrix in self.candidates_ru.items():
|
143
|
+
# We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
|
144
|
+
# utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
|
145
|
+
assert ru_matrix.ndim == 2
|
146
|
+
if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
|
147
|
+
assert ru_matrix.shape[1] == 1
|
148
|
+
|
149
|
+
indicated_ru_matrix = ru_matrix.T * indicators_vec
|
150
|
+
# build lp sum term over all candidates
|
151
|
+
ru_vec = indicated_ru_matrix.sum(axis=1)
|
152
|
+
|
153
|
+
# For cumulative metrics a single constraint is added, for max metrics a separate constraint
|
154
|
+
# is added for each memory element (each element < target => max element < target).
|
155
|
+
assert len(ru_vec) == len(self.ru_constraints[target])
|
156
|
+
for v, c in zip(ru_vec, self.ru_constraints[target]):
|
157
|
+
lp_problem += v <= c
|
@@ -63,7 +63,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
|
|
63
63
|
# layer has max config in the given solution, nothing to optimize
|
64
64
|
continue
|
65
65
|
|
66
|
-
current_node = search_manager.
|
66
|
+
current_node = search_manager.mp_topo_configurable_nodes[node_idx]
|
67
67
|
node_candidates = current_node.candidates_quantization_cfg
|
68
68
|
|
69
69
|
# only weights kernel attribute is quantized with weights mixed precision
|
@@ -32,8 +32,6 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
|
|
32
32
|
ResourceUtilization
|
33
33
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
|
34
34
|
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
|
35
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import \
|
36
|
-
requires_mixed_precision
|
37
35
|
from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
|
38
36
|
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
|
39
37
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \
|
@@ -95,16 +93,8 @@ def core_runner(in_model: Any,
|
|
95
93
|
"enabled, but the provided MixedPrecisionQuantizationConfig is None.")
|
96
94
|
if target_resource_utilization.activation_restricted() or target_resource_utilization.total_mem_restricted():
|
97
95
|
Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.")
|
98
|
-
|
99
|
-
|
100
|
-
target_resource_utilization,
|
101
|
-
representative_data_gen,
|
102
|
-
core_config,
|
103
|
-
fqc,
|
104
|
-
fw_info,
|
105
|
-
fw_impl):
|
106
|
-
core_config.mixed_precision_config.set_mixed_precision_enable()
|
107
|
-
Logger.info('Mixed precision enabled.')
|
96
|
+
core_config.mixed_precision_config.set_mixed_precision_enable()
|
97
|
+
Logger.info('Mixed precision enabled.')
|
108
98
|
|
109
99
|
graph = graph_preparation_runner(in_model,
|
110
100
|
representative_data_gen,
|
File without changes
|
File without changes
|
{mct_nightly-2.3.0.20250322.517.dist-info → mct_nightly-2.3.0.20250324.606.dist-info}/top_level.txt
RENAMED
File without changes
|