mct-nightly 2.2.0.20250114.84821__py3-none-any.whl → 2.2.0.20250114.161150__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/RECORD +13 -13
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/{resource_utilization_tools/ru_methods.py → mixed_precision_ru_helper.py} +35 -70
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +20 -38
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +23 -7
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +48 -39
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +59 -59
- model_compression_toolkit/core/runner.py +18 -13
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: mct-nightly
|
3
|
-
Version: 2.2.0.20250114.
|
3
|
+
Version: 2.2.0.20250114.161150
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: Apache Software License
|
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=xA9z6KdE1v3DTO3wLGBTWy4O0yAMvWUgDyFZsuzNP78,1557
|
2
2
|
model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -8,7 +8,7 @@ model_compression_toolkit/core/__init__.py,sha256=8a0wUNBKwTdJGDk_Ho6WQAXjGuCqQZ
|
|
8
8
|
model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
|
9
9
|
model_compression_toolkit/core/graph_prep_runner.py,sha256=CVTjBaci8F6EP3IKDnRMfxkP-Sv8qY8GpkGt6FyII2U,11376
|
10
10
|
model_compression_toolkit/core/quantization_prep_runner.py,sha256=OtL6g2rTC5mfdKrkzm47EPPW-voGGVYMYxpy2_sfu1U,6547
|
11
|
-
model_compression_toolkit/core/runner.py,sha256=
|
11
|
+
model_compression_toolkit/core/runner.py,sha256=jcKvWUhS5zQBBptONKvRFnkWrdDENcS5Go7kPi8Ya4o,13583
|
12
12
|
model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
|
13
13
|
model_compression_toolkit/core/common/base_substitutions.py,sha256=xDFSmVVs_iFSZfajytI0cuQaNRNcwHX3uqOoHgVUvxQ,1666
|
14
14
|
model_compression_toolkit/core/common/framework_implementation.py,sha256=IkMydCj6voau7dwkYLYA_Ka_EFUKP3GKQdpYN6b1fgc,22163
|
@@ -65,18 +65,18 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_uti
|
|
65
65
|
model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
|
66
66
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=AkKBP5Dm7iwz7qs5WKDB7Bm8Os-jXaMVnlkyrlw4iRY,4603
|
67
67
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
|
68
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=7iJ2YprFvm2Dk9EkXYrwO7-Sf89f537D-KrQP7XhvPs,8889
|
68
69
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=aAuGIzSDKIDiq07nheeWRXLEatzr6Fvoa5ZHv-2BtCI,7130
|
69
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=
|
70
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=fe8R1ZdllwL_YQoHgvzTBjoI3GJo4bEVk89I3zEVr14,32463
|
70
71
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=gsigifJ-ykWNafF4t7UMEC_-nd6YPERAk1_z0kT-Y88,27172
|
71
72
|
model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
|
72
73
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=UWgxzhKWFOoESLq0TFVz0M1PhkU9d9n6wccSA3RgUxk,7903
|
73
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=
|
75
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=
|
76
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=
|
77
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=6LT3rZo9SlDupO-P22oG7f4sAgF_i1IYz5JxQQuMElU,10841
|
75
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=T5yVr7lay-6QLuTDBZNI1Ufj02EMBWuY_yHjC8eHx5I,3998
|
76
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=qkYrYORLL5wmdmCkEY3tDSgabsGYt3OaTDVsgHWYBfE,34885
|
77
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=tfcbMppa5KP_brfkFWRiOX9LQVHGXJtlgxyAt9oDGuw,8529
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
79
|
-
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=
|
79
|
+
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=ZvLxFIfMUPAyKKzPhJcuZyjjngLD9_1wWFU8e14vEbA,17176
|
80
80
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
81
81
|
model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
|
82
82
|
model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
|
@@ -523,8 +523,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
523
523
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
|
524
524
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
525
525
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
526
|
-
mct_nightly-2.2.0.20250114.
|
527
|
-
mct_nightly-2.2.0.20250114.
|
528
|
-
mct_nightly-2.2.0.20250114.
|
529
|
-
mct_nightly-2.2.0.20250114.
|
530
|
-
mct_nightly-2.2.0.20250114.
|
526
|
+
mct_nightly-2.2.0.20250114.161150.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
527
|
+
mct_nightly-2.2.0.20250114.161150.dist-info/METADATA,sha256=AjCYuWPjOdisumA9-7iUbEVGvFr1uzWV9809mi42JyQ,26604
|
528
|
+
mct_nightly-2.2.0.20250114.161150.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
529
|
+
mct_nightly-2.2.0.20250114.161150.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
530
|
+
mct_nightly-2.2.0.20250114.161150.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.20250114.
|
30
|
+
__version__ = "2.2.0.20250114.161150"
|
@@ -12,14 +12,13 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import List, Set, Dict, Optional, Tuple
|
15
|
+
from typing import List, Set, Dict, Optional, Tuple, Any
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
|
19
19
|
from model_compression_toolkit.core import FrameworkInfo
|
20
20
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
21
21
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
22
|
-
from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
|
23
22
|
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
|
24
23
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
25
24
|
RUTarget
|
@@ -44,9 +43,8 @@ class MixedPrecisionRUHelper:
|
|
44
43
|
def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
|
45
44
|
"""
|
46
45
|
Compute utilization of requested targets for a specific configuration in the format expected by LP problem
|
47
|
-
formulation
|
48
|
-
|
49
|
-
consistent between configurations).
|
46
|
+
formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
|
47
|
+
(between calls).
|
50
48
|
|
51
49
|
Args:
|
52
50
|
ru_targets: resource utilization targets to compute.
|
@@ -57,33 +55,26 @@ class MixedPrecisionRUHelper:
|
|
57
55
|
"""
|
58
56
|
|
59
57
|
ru = {}
|
60
|
-
|
61
|
-
act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
|
62
|
-
w_util = None
|
58
|
+
act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
|
63
59
|
if RUTarget.WEIGHTS in ru_targets:
|
64
|
-
|
65
|
-
ru[RUTarget.WEIGHTS] = np.array(list(
|
60
|
+
wu = self._weights_utilization(w_qcs)
|
61
|
+
ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
|
66
62
|
|
67
|
-
# TODO make mp agnostic to activation method
|
68
63
|
if RUTarget.ACTIVATION in ru_targets:
|
69
|
-
|
70
|
-
ru[RUTarget.ACTIVATION] = np.array(list(
|
71
|
-
|
72
|
-
# TODO use maxcut
|
73
|
-
if RUTarget.TOTAL in ru_targets:
|
74
|
-
act_tensors_util = self._activation_tensor_utilization(act_qcs)
|
75
|
-
w_util = w_util or self._weights_utilization(w_qcs)
|
76
|
-
total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
|
77
|
-
# for n in self.graph.nodes if n in act_tensors_util or n in w_util}
|
78
|
-
for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
|
79
|
-
ru[RUTarget.TOTAL] = np.array(list(total.values()))
|
64
|
+
au = self._activation_utilization(act_qcs)
|
65
|
+
ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
|
80
66
|
|
81
67
|
if RUTarget.BOPS in ru_targets:
|
82
68
|
ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
|
83
69
|
|
70
|
+
if RUTarget.TOTAL in ru_targets:
|
71
|
+
raise ValueError('Total target should be computed based on weights and activations targets.')
|
72
|
+
|
73
|
+
assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
|
74
|
+
f'Requested {ru_targets}')
|
84
75
|
return ru
|
85
76
|
|
86
|
-
def
|
77
|
+
def get_quantization_candidates(self, mp_cfg) \
|
87
78
|
-> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
|
88
79
|
"""
|
89
80
|
Retrieve quantization candidates objects for weights and activations from the configuration list.
|
@@ -92,15 +83,13 @@ class MixedPrecisionRUHelper:
|
|
92
83
|
mp_cfg: a list of candidates indices for configurable layers.
|
93
84
|
|
94
85
|
Returns:
|
95
|
-
|
86
|
+
A mapping between nodes to weights quantization config, and a mapping between nodes and activation
|
96
87
|
quantization config.
|
97
88
|
"""
|
98
89
|
mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
|
99
90
|
node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
|
100
|
-
act_qcs = {n: node_qcs
|
101
|
-
|
102
|
-
w_qcs = {n: node_qcs[n].weights_quantization_cfg
|
103
|
-
for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
|
91
|
+
act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
|
92
|
+
w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
|
104
93
|
return act_qcs, w_qcs
|
105
94
|
|
106
95
|
def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
|
@@ -127,8 +116,8 @@ class MixedPrecisionRUHelper:
|
|
127
116
|
nodes_util = {n: u.bytes for n, u in nodes_util.items()}
|
128
117
|
return nodes_util
|
129
118
|
|
130
|
-
def
|
131
|
-
-> Optional[Dict[
|
119
|
+
def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
|
120
|
+
-> Optional[Dict[Any, float]]:
|
132
121
|
"""
|
133
122
|
Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
|
134
123
|
|
@@ -138,41 +127,17 @@ class MixedPrecisionRUHelper:
|
|
138
127
|
Returns:
|
139
128
|
Activation utilization per cut, or empty dict if no configuration was passed.
|
140
129
|
"""
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
|
146
|
-
return cuts_util
|
147
|
-
|
148
|
-
# Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
|
149
|
-
# configurable nodes.
|
150
|
-
return {}
|
151
|
-
|
152
|
-
def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
|
153
|
-
"""
|
154
|
-
Compute activation tensors utilization fo configurable nodes if configuration is passed or
|
155
|
-
for non-configurable nodes otherwise.
|
130
|
+
# Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
|
131
|
+
# covered by the computation of configurable activations.
|
132
|
+
if not act_qcs:
|
133
|
+
return {}
|
156
134
|
|
157
|
-
|
158
|
-
|
135
|
+
_, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
|
136
|
+
TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
|
137
|
+
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
|
138
|
+
return cuts_util
|
159
139
|
|
160
|
-
|
161
|
-
Activation utilization per node.
|
162
|
-
"""
|
163
|
-
if act_qcs:
|
164
|
-
target_criterion = TargetInclusionCriterion.QConfigurable
|
165
|
-
bitwidth_mode = BitwidthMode.QCustom
|
166
|
-
else:
|
167
|
-
target_criterion = TargetInclusionCriterion.QNonConfigurable
|
168
|
-
bitwidth_mode = BitwidthMode.QDefaultSP
|
169
|
-
|
170
|
-
_, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
|
171
|
-
bitwidth_mode=bitwidth_mode,
|
172
|
-
act_qcs=act_qcs)
|
173
|
-
return {n: u.bytes for n, u in nodes_util.items()}
|
174
|
-
|
175
|
-
def _bops_utilization(self, mp_cfg: List[int]):
|
140
|
+
def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
|
176
141
|
"""
|
177
142
|
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
178
143
|
according to the given mixed-precision configuration of a virtual graph with composed nodes.
|
@@ -180,15 +145,15 @@ class MixedPrecisionRUHelper:
|
|
180
145
|
Args:
|
181
146
|
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
182
147
|
|
183
|
-
Returns:
|
184
|
-
|
185
|
-
|
148
|
+
Returns:
|
149
|
+
A vector of node's BOPS count.
|
186
150
|
"""
|
187
|
-
#
|
188
|
-
|
189
|
-
|
190
|
-
|
151
|
+
# bops is computed for all nodes, so non-configurable memory is already covered by the computation of
|
152
|
+
# configurable nodes
|
153
|
+
if not mp_cfg:
|
154
|
+
return np.array([])
|
191
155
|
|
156
|
+
# TODO keeping old implementation for now
|
192
157
|
virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
|
193
158
|
|
194
159
|
mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
|
@@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
|
|
26
26
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
27
27
|
RUTarget, ResourceUtilization
|
28
28
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
|
29
|
-
|
30
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
29
|
+
TargetInclusionCriterion, BitwidthMode
|
30
|
+
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
|
31
31
|
MixedPrecisionRUHelper
|
32
32
|
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
33
33
|
from model_compression_toolkit.logger import Logger
|
@@ -67,13 +67,19 @@ class MixedPrecisionSearchManager:
|
|
67
67
|
self.compute_metric_fn = self.get_sensitivity_metric()
|
68
68
|
self._cuts = None
|
69
69
|
|
70
|
-
|
70
|
+
# To define RU Total constraints we need to compute weights and activations even if they have no constraints
|
71
|
+
# TODO currently this logic is duplicated in linear_programming.py
|
72
|
+
targets = target_resource_utilization.get_restricted_targets()
|
73
|
+
if RUTarget.TOTAL in targets:
|
74
|
+
targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
75
|
+
self.ru_targets_to_compute = targets
|
76
|
+
|
71
77
|
self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
|
72
78
|
self.target_resource_utilization = target_resource_utilization
|
73
79
|
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
|
74
80
|
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
|
75
|
-
self.min_ru = self.ru_helper.compute_utilization(self.
|
76
|
-
self.non_conf_ru_dict = self.
|
81
|
+
self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
|
82
|
+
self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
|
77
83
|
|
78
84
|
self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
|
79
85
|
original_graph=self.original_graph)
|
@@ -111,18 +117,14 @@ class MixedPrecisionSearchManager:
|
|
111
117
|
def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
|
112
118
|
"""
|
113
119
|
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
|
114
|
-
|
115
|
-
- Each row represents the set of resource utilization values for a specific resource utilization
|
116
|
-
measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
|
117
|
-
- Each entry in a specific column represents the resource utilization value of a given configuration
|
118
|
-
(single layer is configured with specific candidate, all other layer are at the minimal resource
|
119
|
-
utilization configuration) for the resource utilization measure of the respective row.
|
120
|
+
Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
|
120
121
|
|
121
122
|
Args:
|
122
123
|
target: The resource target for which the resource utilization is calculated (a RUTarget value).
|
123
124
|
|
124
|
-
Returns:
|
125
|
-
|
125
|
+
Returns:
|
126
|
+
A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
|
127
|
+
depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
|
126
128
|
"""
|
127
129
|
assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
|
128
130
|
|
@@ -132,21 +134,14 @@ class MixedPrecisionSearchManager:
|
|
132
134
|
for c, c_n in enumerate(configurable_sorted_nodes):
|
133
135
|
for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
|
134
136
|
if candidate_idx == self.min_ru_config[c]:
|
135
|
-
|
136
|
-
# always be 0 for all entries in the results vector.
|
137
|
-
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
|
137
|
+
candidate_rus = self.min_ru[target]
|
138
138
|
else:
|
139
|
-
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
|
139
|
+
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
|
140
140
|
|
141
141
|
ru_matrix.append(np.asarray(candidate_rus))
|
142
142
|
|
143
|
-
|
144
|
-
|
145
|
-
# We only move the first axis (num of configurations) to be last,
|
146
|
-
# the remaining axes include the metric specific nodes (rows dimension of the new tensor)
|
147
|
-
# and the ru metric values (if they are non-scalars)
|
148
|
-
np_ru_matrix = np.array(ru_matrix)
|
149
|
-
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
|
143
|
+
np_ru_matrix = np.array(ru_matrix) - self.min_ru[target] # num configurations X num elements
|
144
|
+
return np_ru_matrix
|
150
145
|
|
151
146
|
def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
|
152
147
|
"""
|
@@ -162,7 +157,6 @@ class MixedPrecisionSearchManager:
|
|
162
157
|
|
163
158
|
"""
|
164
159
|
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
|
165
|
-
# TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
|
166
160
|
return self.ru_helper.compute_utilization({target}, cfg)[target]
|
167
161
|
|
168
162
|
@staticmethod
|
@@ -183,18 +177,6 @@ class MixedPrecisionSearchManager:
|
|
183
177
|
updated_cfg[idx] = value
|
184
178
|
return updated_cfg
|
185
179
|
|
186
|
-
def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
|
187
|
-
"""
|
188
|
-
Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
|
189
|
-
resource utilization targets.
|
190
|
-
|
191
|
-
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
|
192
|
-
"""
|
193
|
-
ru_metrics = self.ru_metrics - {RUTarget.BOPS}
|
194
|
-
ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
|
195
|
-
ru[RUTarget.BOPS] = None
|
196
|
-
return ru
|
197
|
-
|
198
180
|
def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
|
199
181
|
"""
|
200
182
|
Computes the resource utilization values for a given mixed-precision configuration.
|
@@ -206,7 +188,7 @@ class MixedPrecisionSearchManager:
|
|
206
188
|
with the given config.
|
207
189
|
|
208
190
|
"""
|
209
|
-
act_qcs, w_qcs = self.ru_helper.
|
191
|
+
act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
|
210
192
|
ru = self.ru_helper.ru_calculator.compute_resource_utilization(
|
211
193
|
target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
|
212
194
|
w_qcs=w_qcs)
|
@@ -86,15 +86,31 @@ class ResourceUtilization:
|
|
86
86
|
ru.total_memory <= self.total_memory and \
|
87
87
|
ru.bops <= self.bops)
|
88
88
|
|
89
|
-
def
|
89
|
+
def get_restricted_targets(self) -> Set[RUTarget]:
|
90
90
|
d = self.get_resource_utilization_dict()
|
91
91
|
return {k for k, v in d.items() if v < np.inf}
|
92
92
|
|
93
93
|
def is_any_restricted(self) -> bool:
|
94
|
-
return bool(self.
|
94
|
+
return bool(self.get_restricted_targets())
|
95
95
|
|
96
|
-
def
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
96
|
+
def get_summary_str(self, restricted: bool):
|
97
|
+
"""
|
98
|
+
Generate summary string.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
restricted: whether to include non-restricted targets.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
Summary string.
|
105
|
+
"""
|
106
|
+
targets = self.get_restricted_targets() if restricted else list(RUTarget)
|
107
|
+
summary = []
|
108
|
+
if RUTarget.WEIGHTS in targets:
|
109
|
+
summary.append(f"Weights memory: {self.weights_memory}")
|
110
|
+
if RUTarget.ACTIVATION in targets:
|
111
|
+
summary.append(f"Activation memory: {self.activation_memory}")
|
112
|
+
if RUTarget.TOTAL in targets:
|
113
|
+
summary.append(f"Total memory: {self.total_memory}")
|
114
|
+
if RUTarget.BOPS in targets:
|
115
|
+
summary.append(f"BOPS: {self.bops}")
|
116
|
+
return ', '.join(summary)
|
@@ -15,8 +15,7 @@
|
|
15
15
|
from collections import defaultdict
|
16
16
|
from copy import deepcopy
|
17
17
|
from enum import Enum, auto
|
18
|
-
from
|
19
|
-
from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence
|
18
|
+
from typing import Dict, NamedTuple, Optional, Tuple, List, Iterable, Union, Literal, Sequence, Set
|
20
19
|
|
21
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
22
21
|
from model_compression_toolkit.core import FrameworkInfo
|
@@ -88,7 +87,7 @@ class Utilization(NamedTuple):
|
|
88
87
|
# Needed for sum (with default start_value=0).
|
89
88
|
if other == 0:
|
90
89
|
return self
|
91
|
-
return self + other
|
90
|
+
return self + other # pragma: no cover
|
92
91
|
|
93
92
|
def __gt__(self, other: 'Utilization'):
|
94
93
|
# Needed for max. Compare by bytes.
|
@@ -96,7 +95,7 @@ class Utilization(NamedTuple):
|
|
96
95
|
|
97
96
|
def __lt__(self, other: 'Utilization'):
|
98
97
|
# Needed for min. Compare by bytes.
|
99
|
-
return self.bytes < other.bytes
|
98
|
+
return self.bytes < other.bytes # pragma: no cover
|
100
99
|
|
101
100
|
|
102
101
|
class ResourceUtilizationCalculator:
|
@@ -119,7 +118,21 @@ class ResourceUtilizationCalculator:
|
|
119
118
|
for n in graph.nodes:
|
120
119
|
self._act_tensors_size[n] = n.get_total_output_params()
|
121
120
|
self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
|
122
|
-
self._cuts = None
|
121
|
+
self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
|
122
|
+
|
123
|
+
@property
|
124
|
+
def cuts(self) -> Dict[Cut, List[BaseNode]]:
|
125
|
+
""" Compute if needed and return graph cuts and their memory element nodes. """
|
126
|
+
if self._cuts is None:
|
127
|
+
memory_graph = MemoryGraph(deepcopy(self.graph))
|
128
|
+
_, _, cuts = compute_graph_max_cut(memory_graph)
|
129
|
+
if cuts is None: # pragma: no cover
|
130
|
+
raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
131
|
+
cuts = [cut for cut in cuts if cut.mem_elements.elements]
|
132
|
+
# cache cuts nodes for future use, so do not filter by target
|
133
|
+
self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
|
134
|
+
for cut in cuts}
|
135
|
+
return self._cuts
|
123
136
|
|
124
137
|
def compute_resource_utilization(self,
|
125
138
|
target_criterion: TargetInclusionCriterion,
|
@@ -146,15 +159,16 @@ class ResourceUtilizationCalculator:
|
|
146
159
|
"""
|
147
160
|
ru_targets = set(ru_targets) if ru_targets else set(RUTarget)
|
148
161
|
|
162
|
+
if w_qcs is not None and not self.is_custom_weights_config_applicable(ru_targets):
|
163
|
+
raise ValueError('Weight configuration passed but no relevant metric requested.')
|
164
|
+
if act_qcs is not None and not self.is_custom_activation_config_applicable(ru_targets):
|
165
|
+
raise ValueError('Activation configuration passed but no relevant metric requested.')
|
166
|
+
|
149
167
|
w_total, a_total = None, None
|
150
168
|
if {RUTarget.WEIGHTS, RUTarget.TOTAL}.intersection(ru_targets):
|
151
169
|
w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs)
|
152
|
-
elif w_qcs is not None: # pragma: no cover
|
153
|
-
raise ValueError('Weight configuration passed but no relevant metric requested.')
|
154
170
|
|
155
|
-
if
|
156
|
-
raise ValueError('Activation configuration passed but no relevant metric requested.')
|
157
|
-
if RUTarget.ACTIVATION in ru_targets:
|
171
|
+
if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
|
158
172
|
a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
|
159
173
|
|
160
174
|
ru = ResourceUtilization()
|
@@ -163,14 +177,12 @@ class ResourceUtilizationCalculator:
|
|
163
177
|
if RUTarget.ACTIVATION in ru_targets:
|
164
178
|
ru.activation_memory = a_total
|
165
179
|
if RUTarget.TOTAL in ru_targets:
|
166
|
-
|
167
|
-
act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
|
168
|
-
ru.total_memory = w_total + act_tensors_total
|
180
|
+
ru.total_memory = w_total + a_total
|
169
181
|
if RUTarget.BOPS in ru_targets:
|
170
182
|
ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
|
171
183
|
bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
|
172
184
|
|
173
|
-
assert ru.
|
185
|
+
assert ru.get_restricted_targets() == set(ru_targets), 'Mismatch between the number of requested and computed metrics'
|
174
186
|
return ru
|
175
187
|
|
176
188
|
def compute_weights_utilization(self,
|
@@ -262,12 +274,12 @@ class ResourceUtilizationCalculator:
|
|
262
274
|
Returns:
|
263
275
|
Total activation utilization of the network.
|
264
276
|
"""
|
265
|
-
return self.
|
277
|
+
return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
|
266
278
|
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
|
279
|
+
def compute_activation_utilization_by_cut(self,
|
280
|
+
target_criterion: TargetInclusionCriterion,
|
281
|
+
bitwidth_mode: BitwidthMode,
|
282
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
|
271
283
|
-> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
|
272
284
|
"""
|
273
285
|
Compute graph activation cuts utilization.
|
@@ -292,20 +304,10 @@ class ResourceUtilizationCalculator:
|
|
292
304
|
if not graph_target_nodes:
|
293
305
|
return 0, {}, {}
|
294
306
|
|
295
|
-
if self._cuts is None:
|
296
|
-
memory_graph = MemoryGraph(deepcopy(self.graph))
|
297
|
-
_, _, cuts = compute_graph_max_cut(memory_graph)
|
298
|
-
if cuts is None: # pragma: no cover
|
299
|
-
raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
300
|
-
cuts = [cut for cut in cuts if cut.mem_elements.elements]
|
301
|
-
# cache cuts nodes for future use, so do not filter by target
|
302
|
-
self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
|
303
|
-
for cut in cuts}
|
304
|
-
|
305
307
|
util_per_cut: Dict[Cut, Utilization] = {} # type: ignore
|
306
308
|
util_per_cut_per_node = defaultdict(dict)
|
307
|
-
for cut in self.
|
308
|
-
cut_target_nodes =
|
309
|
+
for cut in self.cuts:
|
310
|
+
cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
|
309
311
|
if not cut_target_nodes:
|
310
312
|
continue
|
311
313
|
for n in cut_target_nodes:
|
@@ -322,7 +324,7 @@ class ResourceUtilizationCalculator:
|
|
322
324
|
bitwidth_mode: BitwidthMode,
|
323
325
|
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
324
326
|
include_reused=False) \
|
325
|
-
-> Tuple[float, Dict[BaseNode, Utilization]]:
|
327
|
+
-> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover
|
326
328
|
"""
|
327
329
|
Compute resource utilization for graph's activations tensors.
|
328
330
|
|
@@ -462,7 +464,14 @@ class ResourceUtilizationCalculator:
|
|
462
464
|
node_bops = a_nbits * w_nbits * node_mac
|
463
465
|
return node_bops
|
464
466
|
|
465
|
-
|
467
|
+
def is_custom_weights_config_applicable(self, ru_targets: Set[RUTarget]) -> bool:
|
468
|
+
""" Whether custom configuration for weights is compatible with the requested targets."""
|
469
|
+
return bool({RUTarget.WEIGHTS, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets))
|
470
|
+
|
471
|
+
def is_custom_activation_config_applicable(self, ru_targets: Set[RUTarget]) -> bool:
|
472
|
+
""" Whether custom configuration for activations is compatible with the requested targets."""
|
473
|
+
return bool({RUTarget.ACTIVATION, RUTarget.TOTAL, RUTarget.BOPS}.intersection(ru_targets))
|
474
|
+
|
466
475
|
def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
|
467
476
|
"""
|
468
477
|
Retrieve target nodes from a cut filtered by a criterion.
|
@@ -474,7 +483,7 @@ class ResourceUtilizationCalculator:
|
|
474
483
|
Returns:
|
475
484
|
A list of target nodes from a cut.
|
476
485
|
"""
|
477
|
-
cut_nodes =
|
486
|
+
cut_nodes = self.cuts[cut]
|
478
487
|
return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
|
479
488
|
|
480
489
|
def _get_target_weight_nodes(self,
|
@@ -500,7 +509,7 @@ class ResourceUtilizationCalculator:
|
|
500
509
|
quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
|
501
510
|
configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
|
502
511
|
nodes = [n for n in quantized if n not in configurable]
|
503
|
-
elif target_criterion == TargetInclusionCriterion.Any:
|
512
|
+
elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover
|
504
513
|
nodes = list(self.graph.nodes)
|
505
514
|
else: # pragma: no cover
|
506
515
|
raise ValueError(f'Unknown {target_criterion}.')
|
@@ -566,15 +575,15 @@ class ResourceUtilizationCalculator:
|
|
566
575
|
Selected nodes.
|
567
576
|
"""
|
568
577
|
nodes = nodes or self.graph.nodes
|
569
|
-
if target_criterion == TargetInclusionCriterion.QConfigurable:
|
578
|
+
if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover
|
570
579
|
nodes = [n for n in nodes if n.has_configurable_activation()]
|
571
580
|
elif target_criterion == TargetInclusionCriterion.AnyQuantized:
|
572
581
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
|
573
|
-
elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
|
582
|
+
elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover
|
574
583
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
|
575
584
|
elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
|
576
585
|
raise ValueError(f'Unknown {target_criterion}.')
|
577
|
-
if not include_reused:
|
586
|
+
if not include_reused: # pragma: no cover
|
578
587
|
nodes = [n for n in nodes if not n.reuse]
|
579
588
|
return nodes
|
580
589
|
|
@@ -664,4 +673,4 @@ class ResourceUtilizationCalculator:
|
|
664
673
|
f'as it {len(w_qcs)}!=1 unique candidates.')
|
665
674
|
return w_qcs[0].weights_n_bits
|
666
675
|
|
667
|
-
raise ValueError(f'Unknown mode {bitwidth_mode.name}')
|
676
|
+
raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover
|
@@ -118,7 +118,7 @@ def requires_mixed_precision(in_model: Any,
|
|
118
118
|
|
119
119
|
ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
|
120
120
|
max_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QMaxBit,
|
121
|
-
ru_targets=target_resource_utilization.
|
121
|
+
ru_targets=target_resource_utilization.get_restricted_targets())
|
122
122
|
return not target_resource_utilization.is_satisfied_by(max_ru)
|
123
123
|
|
124
124
|
|
@@ -16,7 +16,7 @@
|
|
16
16
|
import numpy as np
|
17
17
|
from pulp import *
|
18
18
|
from tqdm import tqdm
|
19
|
-
from typing import Dict, Tuple
|
19
|
+
from typing import Dict, Tuple, Set, Any
|
20
20
|
|
21
21
|
from model_compression_toolkit.logger import Logger
|
22
22
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
|
@@ -167,95 +167,95 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
|
|
167
167
|
indicators_arr = np.array(indicators)
|
168
168
|
indicators_matrix = np.diag(indicators_arr)
|
169
169
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
target=target,
|
176
|
-
target_resource_utilization_value=ru_value,
|
177
|
-
indicators_matrix=indicators_matrix,
|
178
|
-
lp_problem=lp_problem,
|
179
|
-
non_conf_ru_vector=non_conf_ru_vector)
|
170
|
+
_add_ru_constraints(search_manager=search_manager,
|
171
|
+
target_resource_utilization=target_resource_utilization,
|
172
|
+
indicators_matrix=indicators_matrix,
|
173
|
+
lp_problem=lp_problem,
|
174
|
+
non_conf_ru_dict=search_manager.non_conf_ru_dict)
|
180
175
|
else: # pragma: no cover
|
181
176
|
Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
|
182
177
|
"A valid 'target_resource_utilization' is required.")
|
183
178
|
return lp_problem
|
184
179
|
|
185
180
|
|
186
|
-
def
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
non_conf_ru_vector: np.ndarray):
|
181
|
+
def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
|
182
|
+
target_resource_utilization: ResourceUtilization,
|
183
|
+
indicators_matrix: np.ndarray,
|
184
|
+
lp_problem: LpProblem,
|
185
|
+
non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]):
|
192
186
|
"""
|
193
|
-
Adding
|
187
|
+
Adding targets constraints for the Lp problem for the given target resource utilization.
|
194
188
|
The update to the Lp problem object is done inplace.
|
195
189
|
|
196
190
|
Args:
|
197
191
|
search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
|
198
|
-
|
199
|
-
target_resource_utilization_value: Target resource utilization value of the given target resource utilization
|
200
|
-
for which the constraint is added.
|
192
|
+
target_resource_utilization: Target resource utilization.
|
201
193
|
indicators_matrix: A diagonal matrix of the Lp problem's indicators.
|
202
194
|
lp_problem: An Lp problem object to add constraint to.
|
203
|
-
|
204
|
-
|
195
|
+
non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
|
205
196
|
"""
|
197
|
+
ru_indicated_vectors = {}
|
198
|
+
# targets to add constraints for
|
199
|
+
constraints_targets = target_resource_utilization.get_restricted_targets()
|
200
|
+
# to add constraints for Total target we need to compute weight and activation
|
201
|
+
targets_to_compute = constraints_targets
|
202
|
+
if RUTarget.TOTAL in constraints_targets:
|
203
|
+
targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
204
|
+
|
205
|
+
for target in targets_to_compute:
|
206
|
+
ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
|
207
|
+
indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
|
208
|
+
|
209
|
+
# Sum the indicated values over all configurations, and add the value for minimal configuration once.
|
210
|
+
# Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
|
211
|
+
# that would be required if that configuration is selected).
|
212
|
+
# Each element in a vector is an lp object representing the configurations sum term for a memory element.
|
213
|
+
ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
|
214
|
+
|
215
|
+
non_conf_ru_vec = non_conf_ru_dict[target]
|
216
|
+
if non_conf_ru_vec is not None and non_conf_ru_vec.size:
|
217
|
+
# add non-conf value as additional mem elements so that they get aggregated
|
218
|
+
ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
|
219
|
+
ru_indicated_vectors[target] = ru_vec
|
220
|
+
|
221
|
+
# add constraints only for the restricted targets in target resource utilization.
|
222
|
+
for target in constraints_targets:
|
223
|
+
target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
|
224
|
+
aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
|
225
|
+
for v in aggr_ru:
|
226
|
+
if isinstance(v, float):
|
227
|
+
if v > target_resource_utilization_value:
|
228
|
+
Logger.critical(
|
229
|
+
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
230
|
+
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
231
|
+
else:
|
232
|
+
lp_problem += v <= target_resource_utilization_value
|
233
|
+
|
206
234
|
|
207
|
-
|
208
|
-
indicated_ru_matrix = np.matmul(ru_matrix, indicators_matrix)
|
209
|
-
# Need to re-organize the tensor such that the configurations' axis will be second,
|
210
|
-
# and all metric values' axis will come afterword
|
211
|
-
indicated_ru_matrix = np.moveaxis(indicated_ru_matrix, source=len(indicated_ru_matrix.shape) - 1, destination=1)
|
212
|
-
|
213
|
-
# In order to get the result resource utilization according to a chosen set of indicators, we sum each row in
|
214
|
-
# the result matrix. Each row represents the resource utilization values for a specific resource utilization metric,
|
215
|
-
# such that only elements corresponding to a configuration which implied by the set of indicators will have some
|
216
|
-
# positive value different than 0 (and will contribute to the total resource utilization).
|
217
|
-
ru_sum_vector = np.array([
|
218
|
-
np.sum(indicated_ru_matrix[i], axis=0) + # sum of metric values over all configurations in a row
|
219
|
-
search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])])
|
220
|
-
|
221
|
-
ru_vec = ru_sum_vector
|
222
|
-
if non_conf_ru_vector is not None and non_conf_ru_vector.size:
|
223
|
-
ru_vec = np.concatenate([ru_vec, non_conf_ru_vector])
|
224
|
-
|
225
|
-
aggr_ru = _aggregate_for_lp(ru_vec, target)
|
226
|
-
for v in aggr_ru:
|
227
|
-
if isinstance(v, float):
|
228
|
-
if v > target_resource_utilization_value:
|
229
|
-
Logger.critical(
|
230
|
-
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
231
|
-
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
232
|
-
else:
|
233
|
-
lp_problem += v <= target_resource_utilization_value
|
234
|
-
|
235
|
-
|
236
|
-
def _aggregate_for_lp(ru_vec, target: RUTarget) -> list:
|
235
|
+
def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
|
237
236
|
"""
|
238
237
|
Aggregate resource utilization values for the LP.
|
239
238
|
|
240
239
|
Args:
|
241
|
-
|
240
|
+
targets_ru_vec: resource utilization vectors for all precomputed targets.
|
242
241
|
target: resource utilization target.
|
243
242
|
|
244
243
|
Returns:
|
245
244
|
Aggregated resource utilization.
|
246
245
|
"""
|
247
246
|
if target == RUTarget.TOTAL:
|
248
|
-
w = lpSum(
|
249
|
-
|
247
|
+
w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
|
248
|
+
act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
|
249
|
+
return [w + v for v in act_ru_vec]
|
250
250
|
|
251
251
|
if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
|
252
|
-
return [lpSum(
|
252
|
+
return [lpSum(targets_ru_vec[target])]
|
253
253
|
|
254
254
|
if target == RUTarget.ACTIVATION:
|
255
255
|
# for max aggregation, each value constitutes a separate constraint
|
256
|
-
return list(
|
256
|
+
return list(targets_ru_vec[target])
|
257
257
|
|
258
|
-
raise ValueError(f'Unexpected target {target}.')
|
258
|
+
raise ValueError(f'Unexpected target {target}.') # pragma: no cover
|
259
259
|
|
260
260
|
|
261
261
|
def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
@@ -14,7 +14,7 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
import copy
|
17
|
-
from typing import Callable, Any, List
|
17
|
+
from typing import Callable, Any, List, Optional
|
18
18
|
|
19
19
|
from model_compression_toolkit.core.common import FrameworkInfo
|
20
20
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
@@ -170,6 +170,7 @@ def core_runner(in_model: Any,
|
|
170
170
|
|
171
171
|
_set_final_resource_utilization(graph=tg,
|
172
172
|
final_bit_widths_config=bit_widths_config,
|
173
|
+
target_resource_utilization=target_resource_utilization,
|
173
174
|
fw_info=fw_info,
|
174
175
|
fw_impl=fw_impl)
|
175
176
|
|
@@ -207,6 +208,7 @@ def core_runner(in_model: Any,
|
|
207
208
|
|
208
209
|
def _set_final_resource_utilization(graph: Graph,
|
209
210
|
final_bit_widths_config: List[int],
|
211
|
+
target_resource_utilization: Optional[ResourceUtilization],
|
210
212
|
fw_info: FrameworkInfo,
|
211
213
|
fw_impl: FrameworkImplementation):
|
212
214
|
"""
|
@@ -216,21 +218,24 @@ def _set_final_resource_utilization(graph: Graph,
|
|
216
218
|
Args:
|
217
219
|
graph: Graph to compute the resource utilization for.
|
218
220
|
final_bit_widths_config: The final bit-width configuration to quantize the model accordingly.
|
221
|
+
target_resource_utilization: Requested target resource utilization if relevant.
|
219
222
|
fw_info: A FrameworkInfo object.
|
220
223
|
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
221
224
|
|
222
225
|
"""
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
if
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
226
|
+
ru_targets = target_resource_utilization.get_restricted_targets()
|
227
|
+
final_ru = None
|
228
|
+
if ru_targets:
|
229
|
+
ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
|
230
|
+
w_qcs, a_qcs = None, None
|
231
|
+
if ru_calculator.is_custom_weights_config_applicable(ru_targets):
|
232
|
+
w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes}
|
233
|
+
if ru_calculator.is_custom_activation_config_applicable(ru_targets):
|
234
|
+
a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes}
|
235
|
+
final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized,
|
236
|
+
BitwidthMode.QCustom,
|
237
|
+
act_qcs=a_qcs, w_qcs=w_qcs, ru_targets=ru_targets)
|
238
|
+
summary = final_ru.get_summary_str(restricted=True)
|
239
|
+
Logger.info(f'Resource utilization for quantized mixed-precision targets:\n {summary}.')
|
235
240
|
graph.user_info.final_resource_utilization = final_ru
|
236
241
|
graph.user_info.mixed_precision_cfg = final_bit_widths_config
|
File without changes
|
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.161150.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|