mct-nightly 2.2.0.20250114.84821__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +10 -10
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/{resource_utilization_tools/ru_methods.py → mixed_precision_ru_helper.py} +35 -70
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +20 -38
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +35 -34
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +59 -59
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: mct-nightly
|
3
|
-
Version: 2.2.0.20250114.
|
3
|
+
Version: 2.2.0.20250114.134534
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: Apache Software License
|
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=dBTcbUHy3iim5N_8DIYbA_lq8Kp7tPhl7FelHVRgJRo,1557
|
2
2
|
model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -65,18 +65,18 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_uti
|
|
65
65
|
model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
|
66
66
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=AkKBP5Dm7iwz7qs5WKDB7Bm8Os-jXaMVnlkyrlw4iRY,4603
|
67
67
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
|
68
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=7iJ2YprFvm2Dk9EkXYrwO7-Sf89f537D-KrQP7XhvPs,8889
|
68
69
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=aAuGIzSDKIDiq07nheeWRXLEatzr6Fvoa5ZHv-2BtCI,7130
|
69
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=
|
70
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=7ROKH1bTQEoyl5yLj10NbOWLFJgJicHBBJmUT_s1xnw,32463
|
70
71
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=gsigifJ-ykWNafF4t7UMEC_-nd6YPERAk1_z0kT-Y88,27172
|
71
72
|
model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
|
72
73
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=UWgxzhKWFOoESLq0TFVz0M1PhkU9d9n6wccSA3RgUxk,7903
|
73
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
75
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=89gXow5VMOsQX0SxLLoVvVDDxQd1z9b6crEWZgeWSaY,3453
|
75
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=
|
76
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=jvPhYhzGMKKgoqkEBMeDcOiM8wHdHxn_hM1RVFgvERw,34262
|
76
77
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=5sbFXgDA6mpkXXAmk6HmR5UvBnuAmkoqTHu3ah6npsY,8529
|
77
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=6LT3rZo9SlDupO-P22oG7f4sAgF_i1IYz5JxQQuMElU,10841
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
79
|
-
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=
|
79
|
+
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=0NUmtGiAOLl3GObr6V5L6GU19fXmp89GKDlKAKZkxwU,17176
|
80
80
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
81
81
|
model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
|
82
82
|
model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
|
@@ -523,8 +523,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
523
523
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
|
524
524
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
525
525
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
526
|
-
mct_nightly-2.2.0.20250114.
|
527
|
-
mct_nightly-2.2.0.20250114.
|
528
|
-
mct_nightly-2.2.0.20250114.
|
529
|
-
mct_nightly-2.2.0.20250114.
|
530
|
-
mct_nightly-2.2.0.20250114.
|
526
|
+
mct_nightly-2.2.0.20250114.134534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
527
|
+
mct_nightly-2.2.0.20250114.134534.dist-info/METADATA,sha256=quvuXUrjOH_pIW_pD6rxY0fFwE7NxpduT0u1P1eolbk,26604
|
528
|
+
mct_nightly-2.2.0.20250114.134534.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
529
|
+
mct_nightly-2.2.0.20250114.134534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
530
|
+
mct_nightly-2.2.0.20250114.134534.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.20250114.
|
30
|
+
__version__ = "2.2.0.20250114.134534"
|
@@ -12,14 +12,13 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import List, Set, Dict, Optional, Tuple
|
15
|
+
from typing import List, Set, Dict, Optional, Tuple, Any
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
|
19
19
|
from model_compression_toolkit.core import FrameworkInfo
|
20
20
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
21
21
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
22
|
-
from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
|
23
22
|
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
|
24
23
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
25
24
|
RUTarget
|
@@ -44,9 +43,8 @@ class MixedPrecisionRUHelper:
|
|
44
43
|
def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
|
45
44
|
"""
|
46
45
|
Compute utilization of requested targets for a specific configuration in the format expected by LP problem
|
47
|
-
formulation
|
48
|
-
|
49
|
-
consistent between configurations).
|
46
|
+
formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
|
47
|
+
(between calls).
|
50
48
|
|
51
49
|
Args:
|
52
50
|
ru_targets: resource utilization targets to compute.
|
@@ -57,33 +55,26 @@ class MixedPrecisionRUHelper:
|
|
57
55
|
"""
|
58
56
|
|
59
57
|
ru = {}
|
60
|
-
|
61
|
-
act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
|
62
|
-
w_util = None
|
58
|
+
act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
|
63
59
|
if RUTarget.WEIGHTS in ru_targets:
|
64
|
-
|
65
|
-
ru[RUTarget.WEIGHTS] = np.array(list(
|
60
|
+
wu = self._weights_utilization(w_qcs)
|
61
|
+
ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
|
66
62
|
|
67
|
-
# TODO make mp agnostic to activation method
|
68
63
|
if RUTarget.ACTIVATION in ru_targets:
|
69
|
-
|
70
|
-
ru[RUTarget.ACTIVATION] = np.array(list(
|
71
|
-
|
72
|
-
# TODO use maxcut
|
73
|
-
if RUTarget.TOTAL in ru_targets:
|
74
|
-
act_tensors_util = self._activation_tensor_utilization(act_qcs)
|
75
|
-
w_util = w_util or self._weights_utilization(w_qcs)
|
76
|
-
total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
|
77
|
-
# for n in self.graph.nodes if n in act_tensors_util or n in w_util}
|
78
|
-
for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
|
79
|
-
ru[RUTarget.TOTAL] = np.array(list(total.values()))
|
64
|
+
au = self._activation_utilization(act_qcs)
|
65
|
+
ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
|
80
66
|
|
81
67
|
if RUTarget.BOPS in ru_targets:
|
82
68
|
ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
|
83
69
|
|
70
|
+
if RUTarget.TOTAL in ru_targets:
|
71
|
+
raise ValueError('Total target should be computed based on weights and activations targets.')
|
72
|
+
|
73
|
+
assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
|
74
|
+
f'Requested {ru_targets}')
|
84
75
|
return ru
|
85
76
|
|
86
|
-
def
|
77
|
+
def get_quantization_candidates(self, mp_cfg) \
|
87
78
|
-> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
|
88
79
|
"""
|
89
80
|
Retrieve quantization candidates objects for weights and activations from the configuration list.
|
@@ -92,15 +83,13 @@ class MixedPrecisionRUHelper:
|
|
92
83
|
mp_cfg: a list of candidates indices for configurable layers.
|
93
84
|
|
94
85
|
Returns:
|
95
|
-
|
86
|
+
A mapping between nodes to weights quantization config, and a mapping between nodes and activation
|
96
87
|
quantization config.
|
97
88
|
"""
|
98
89
|
mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
|
99
90
|
node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
|
100
|
-
act_qcs = {n: node_qcs
|
101
|
-
|
102
|
-
w_qcs = {n: node_qcs[n].weights_quantization_cfg
|
103
|
-
for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
|
91
|
+
act_qcs = {n: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
|
92
|
+
w_qcs = {n: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
|
104
93
|
return act_qcs, w_qcs
|
105
94
|
|
106
95
|
def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
|
@@ -127,8 +116,8 @@ class MixedPrecisionRUHelper:
|
|
127
116
|
nodes_util = {n: u.bytes for n, u in nodes_util.items()}
|
128
117
|
return nodes_util
|
129
118
|
|
130
|
-
def
|
131
|
-
-> Optional[Dict[
|
119
|
+
def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
|
120
|
+
-> Optional[Dict[Any, float]]:
|
132
121
|
"""
|
133
122
|
Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
|
134
123
|
|
@@ -138,41 +127,17 @@ class MixedPrecisionRUHelper:
|
|
138
127
|
Returns:
|
139
128
|
Activation utilization per cut, or empty dict if no configuration was passed.
|
140
129
|
"""
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
|
146
|
-
return cuts_util
|
147
|
-
|
148
|
-
# Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
|
149
|
-
# configurable nodes.
|
150
|
-
return {}
|
151
|
-
|
152
|
-
def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
|
153
|
-
"""
|
154
|
-
Compute activation tensors utilization fo configurable nodes if configuration is passed or
|
155
|
-
for non-configurable nodes otherwise.
|
130
|
+
# Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
|
131
|
+
# covered by the computation of configurable activations.
|
132
|
+
if not act_qcs:
|
133
|
+
return {}
|
156
134
|
|
157
|
-
|
158
|
-
|
135
|
+
_, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
|
136
|
+
TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
|
137
|
+
cuts_util = {c: u.bytes for c, u in cuts_util.items()}
|
138
|
+
return cuts_util
|
159
139
|
|
160
|
-
|
161
|
-
Activation utilization per node.
|
162
|
-
"""
|
163
|
-
if act_qcs:
|
164
|
-
target_criterion = TargetInclusionCriterion.QConfigurable
|
165
|
-
bitwidth_mode = BitwidthMode.QCustom
|
166
|
-
else:
|
167
|
-
target_criterion = TargetInclusionCriterion.QNonConfigurable
|
168
|
-
bitwidth_mode = BitwidthMode.QDefaultSP
|
169
|
-
|
170
|
-
_, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
|
171
|
-
bitwidth_mode=bitwidth_mode,
|
172
|
-
act_qcs=act_qcs)
|
173
|
-
return {n: u.bytes for n, u in nodes_util.items()}
|
174
|
-
|
175
|
-
def _bops_utilization(self, mp_cfg: List[int]):
|
140
|
+
def _bops_utilization(self, mp_cfg: List[int]) -> np.ndarray:
|
176
141
|
"""
|
177
142
|
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
178
143
|
according to the given mixed-precision configuration of a virtual graph with composed nodes.
|
@@ -180,15 +145,15 @@ class MixedPrecisionRUHelper:
|
|
180
145
|
Args:
|
181
146
|
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
182
147
|
|
183
|
-
Returns:
|
184
|
-
|
185
|
-
|
148
|
+
Returns:
|
149
|
+
A vector of node's BOPS count.
|
186
150
|
"""
|
187
|
-
#
|
188
|
-
|
189
|
-
|
190
|
-
|
151
|
+
# bops is computed for all nodes, so non-configurable memory is already covered by the computation of
|
152
|
+
# configurable nodes
|
153
|
+
if not mp_cfg:
|
154
|
+
return np.array([])
|
191
155
|
|
156
|
+
# TODO keeping old implementation for now
|
192
157
|
virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
|
193
158
|
|
194
159
|
mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
|
@@ -26,8 +26,8 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
|
|
26
26
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
27
27
|
RUTarget, ResourceUtilization
|
28
28
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
|
29
|
-
|
30
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
29
|
+
TargetInclusionCriterion, BitwidthMode
|
30
|
+
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
|
31
31
|
MixedPrecisionRUHelper
|
32
32
|
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
33
33
|
from model_compression_toolkit.logger import Logger
|
@@ -67,13 +67,19 @@ class MixedPrecisionSearchManager:
|
|
67
67
|
self.compute_metric_fn = self.get_sensitivity_metric()
|
68
68
|
self._cuts = None
|
69
69
|
|
70
|
-
|
70
|
+
# To define RU Total constraints we need to compute weights and activations even if they have no constraints
|
71
|
+
# TODO currently this logic is duplicated in linear_programming.py
|
72
|
+
targets = target_resource_utilization.get_restricted_metrics()
|
73
|
+
if RUTarget.TOTAL in targets:
|
74
|
+
targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
75
|
+
self.ru_targets_to_compute = targets
|
76
|
+
|
71
77
|
self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
|
72
78
|
self.target_resource_utilization = target_resource_utilization
|
73
79
|
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
|
74
80
|
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
|
75
|
-
self.min_ru = self.ru_helper.compute_utilization(self.
|
76
|
-
self.non_conf_ru_dict = self.
|
81
|
+
self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
|
82
|
+
self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
|
77
83
|
|
78
84
|
self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
|
79
85
|
original_graph=self.original_graph)
|
@@ -111,18 +117,14 @@ class MixedPrecisionSearchManager:
|
|
111
117
|
def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
|
112
118
|
"""
|
113
119
|
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
|
114
|
-
|
115
|
-
- Each row represents the set of resource utilization values for a specific resource utilization
|
116
|
-
measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
|
117
|
-
- Each entry in a specific column represents the resource utilization value of a given configuration
|
118
|
-
(single layer is configured with specific candidate, all other layer are at the minimal resource
|
119
|
-
utilization configuration) for the resource utilization measure of the respective row.
|
120
|
+
Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
|
120
121
|
|
121
122
|
Args:
|
122
123
|
target: The resource target for which the resource utilization is calculated (a RUTarget value).
|
123
124
|
|
124
|
-
Returns:
|
125
|
-
|
125
|
+
Returns:
|
126
|
+
A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
|
127
|
+
depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
|
126
128
|
"""
|
127
129
|
assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
|
128
130
|
|
@@ -132,21 +134,14 @@ class MixedPrecisionSearchManager:
|
|
132
134
|
for c, c_n in enumerate(configurable_sorted_nodes):
|
133
135
|
for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
|
134
136
|
if candidate_idx == self.min_ru_config[c]:
|
135
|
-
|
136
|
-
# always be 0 for all entries in the results vector.
|
137
|
-
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
|
137
|
+
candidate_rus = self.min_ru[target]
|
138
138
|
else:
|
139
|
-
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
|
139
|
+
candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
|
140
140
|
|
141
141
|
ru_matrix.append(np.asarray(candidate_rus))
|
142
142
|
|
143
|
-
|
144
|
-
|
145
|
-
# We only move the first axis (num of configurations) to be last,
|
146
|
-
# the remaining axes include the metric specific nodes (rows dimension of the new tensor)
|
147
|
-
# and the ru metric values (if they are non-scalars)
|
148
|
-
np_ru_matrix = np.array(ru_matrix)
|
149
|
-
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
|
143
|
+
np_ru_matrix = np.array(ru_matrix) - self.min_ru[target] # num configurations X num elements
|
144
|
+
return np_ru_matrix
|
150
145
|
|
151
146
|
def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
|
152
147
|
"""
|
@@ -162,7 +157,6 @@ class MixedPrecisionSearchManager:
|
|
162
157
|
|
163
158
|
"""
|
164
159
|
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
|
165
|
-
# TODO compute for all targets at once. Currently the way up to add_set_of_ru_constraints is per target.
|
166
160
|
return self.ru_helper.compute_utilization({target}, cfg)[target]
|
167
161
|
|
168
162
|
@staticmethod
|
@@ -183,18 +177,6 @@ class MixedPrecisionSearchManager:
|
|
183
177
|
updated_cfg[idx] = value
|
184
178
|
return updated_cfg
|
185
179
|
|
186
|
-
def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
|
187
|
-
"""
|
188
|
-
Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
|
189
|
-
resource utilization targets.
|
190
|
-
|
191
|
-
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
|
192
|
-
"""
|
193
|
-
ru_metrics = self.ru_metrics - {RUTarget.BOPS}
|
194
|
-
ru = self.ru_helper.compute_utilization(ru_targets=ru_metrics, mp_cfg=None)
|
195
|
-
ru[RUTarget.BOPS] = None
|
196
|
-
return ru
|
197
|
-
|
198
180
|
def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
|
199
181
|
"""
|
200
182
|
Computes the resource utilization values for a given mixed-precision configuration.
|
@@ -206,7 +188,7 @@ class MixedPrecisionSearchManager:
|
|
206
188
|
with the given config.
|
207
189
|
|
208
190
|
"""
|
209
|
-
act_qcs, w_qcs = self.ru_helper.
|
191
|
+
act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
|
210
192
|
ru = self.ru_helper.ru_calculator.compute_resource_utilization(
|
211
193
|
target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
|
212
194
|
w_qcs=w_qcs)
|
@@ -88,7 +88,7 @@ class Utilization(NamedTuple):
|
|
88
88
|
# Needed for sum (with default start_value=0).
|
89
89
|
if other == 0:
|
90
90
|
return self
|
91
|
-
return self + other
|
91
|
+
return self + other # pragma: no cover
|
92
92
|
|
93
93
|
def __gt__(self, other: 'Utilization'):
|
94
94
|
# Needed for max. Compare by bytes.
|
@@ -96,7 +96,7 @@ class Utilization(NamedTuple):
|
|
96
96
|
|
97
97
|
def __lt__(self, other: 'Utilization'):
|
98
98
|
# Needed for min. Compare by bytes.
|
99
|
-
return self.bytes < other.bytes
|
99
|
+
return self.bytes < other.bytes # pragma: no cover
|
100
100
|
|
101
101
|
|
102
102
|
class ResourceUtilizationCalculator:
|
@@ -119,7 +119,21 @@ class ResourceUtilizationCalculator:
|
|
119
119
|
for n in graph.nodes:
|
120
120
|
self._act_tensors_size[n] = n.get_total_output_params()
|
121
121
|
self._params_cnt[n] = {k: v.size for k, v in n.weights.items()}
|
122
|
-
self._cuts = None
|
122
|
+
self._cuts: Optional[Dict[Cut, List[BaseNode]]] = None
|
123
|
+
|
124
|
+
@property
|
125
|
+
def cuts(self) -> Dict[Cut, List[BaseNode]]:
|
126
|
+
""" Compute if needed and return graph cuts and their memory element nodes. """
|
127
|
+
if self._cuts is None:
|
128
|
+
memory_graph = MemoryGraph(deepcopy(self.graph))
|
129
|
+
_, _, cuts = compute_graph_max_cut(memory_graph)
|
130
|
+
if cuts is None: # pragma: no cover
|
131
|
+
raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
132
|
+
cuts = [cut for cut in cuts if cut.mem_elements.elements]
|
133
|
+
# cache cuts nodes for future use, so do not filter by target
|
134
|
+
self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
|
135
|
+
for cut in cuts}
|
136
|
+
return self._cuts
|
123
137
|
|
124
138
|
def compute_resource_utilization(self,
|
125
139
|
target_criterion: TargetInclusionCriterion,
|
@@ -152,10 +166,10 @@ class ResourceUtilizationCalculator:
|
|
152
166
|
elif w_qcs is not None: # pragma: no cover
|
153
167
|
raise ValueError('Weight configuration passed but no relevant metric requested.')
|
154
168
|
|
155
|
-
if
|
156
|
-
raise ValueError('Activation configuration passed but no relevant metric requested.')
|
157
|
-
if RUTarget.ACTIVATION in ru_targets:
|
169
|
+
if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
|
158
170
|
a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
|
171
|
+
elif act_qcs is not None: # pragma: no cover
|
172
|
+
raise ValueError('Activation configuration passed but no relevant metric requested.')
|
159
173
|
|
160
174
|
ru = ResourceUtilization()
|
161
175
|
if RUTarget.WEIGHTS in ru_targets:
|
@@ -163,9 +177,7 @@ class ResourceUtilizationCalculator:
|
|
163
177
|
if RUTarget.ACTIVATION in ru_targets:
|
164
178
|
ru.activation_memory = a_total
|
165
179
|
if RUTarget.TOTAL in ru_targets:
|
166
|
-
|
167
|
-
act_tensors_total, *_ = self.compute_activation_tensors_utilization(target_criterion, bitwidth_mode, act_qcs)
|
168
|
-
ru.total_memory = w_total + act_tensors_total
|
180
|
+
ru.total_memory = w_total + a_total
|
169
181
|
if RUTarget.BOPS in ru_targets:
|
170
182
|
ru.bops, _ = self.compute_bops(target_criterion=target_criterion,
|
171
183
|
bitwidth_mode=bitwidth_mode, act_qcs=act_qcs, w_qcs=w_qcs)
|
@@ -262,12 +274,12 @@ class ResourceUtilizationCalculator:
|
|
262
274
|
Returns:
|
263
275
|
Total activation utilization of the network.
|
264
276
|
"""
|
265
|
-
return self.
|
277
|
+
return self.compute_activation_utilization_by_cut(target_criterion, bitwidth_mode, act_qcs)[0]
|
266
278
|
|
267
|
-
def
|
268
|
-
|
269
|
-
|
270
|
-
|
279
|
+
def compute_activation_utilization_by_cut(self,
|
280
|
+
target_criterion: TargetInclusionCriterion,
|
281
|
+
bitwidth_mode: BitwidthMode,
|
282
|
+
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
|
271
283
|
-> Tuple[float, Dict[Cut, Utilization], Dict[Cut, Dict[BaseNode, Utilization]]]:
|
272
284
|
"""
|
273
285
|
Compute graph activation cuts utilization.
|
@@ -292,20 +304,10 @@ class ResourceUtilizationCalculator:
|
|
292
304
|
if not graph_target_nodes:
|
293
305
|
return 0, {}, {}
|
294
306
|
|
295
|
-
if self._cuts is None:
|
296
|
-
memory_graph = MemoryGraph(deepcopy(self.graph))
|
297
|
-
_, _, cuts = compute_graph_max_cut(memory_graph)
|
298
|
-
if cuts is None: # pragma: no cover
|
299
|
-
raise RuntimeError("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
300
|
-
cuts = [cut for cut in cuts if cut.mem_elements.elements]
|
301
|
-
# cache cuts nodes for future use, so do not filter by target
|
302
|
-
self._cuts = {cut: [self.graph.find_node_by_name(m.node_name)[0] for m in cut.mem_elements.elements]
|
303
|
-
for cut in cuts}
|
304
|
-
|
305
307
|
util_per_cut: Dict[Cut, Utilization] = {} # type: ignore
|
306
308
|
util_per_cut_per_node = defaultdict(dict)
|
307
|
-
for cut in self.
|
308
|
-
cut_target_nodes =
|
309
|
+
for cut in self.cuts:
|
310
|
+
cut_target_nodes = self._get_cut_target_nodes(cut, target_criterion)
|
309
311
|
if not cut_target_nodes:
|
310
312
|
continue
|
311
313
|
for n in cut_target_nodes:
|
@@ -322,7 +324,7 @@ class ResourceUtilizationCalculator:
|
|
322
324
|
bitwidth_mode: BitwidthMode,
|
323
325
|
act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]] = None,
|
324
326
|
include_reused=False) \
|
325
|
-
-> Tuple[float, Dict[BaseNode, Utilization]]:
|
327
|
+
-> Tuple[float, Dict[BaseNode, Utilization]]: # pragma: no cover
|
326
328
|
"""
|
327
329
|
Compute resource utilization for graph's activations tensors.
|
328
330
|
|
@@ -462,7 +464,6 @@ class ResourceUtilizationCalculator:
|
|
462
464
|
node_bops = a_nbits * w_nbits * node_mac
|
463
465
|
return node_bops
|
464
466
|
|
465
|
-
@lru_cache
|
466
467
|
def _get_cut_target_nodes(self, cut: Cut, target_criterion: TargetInclusionCriterion) -> List[BaseNode]:
|
467
468
|
"""
|
468
469
|
Retrieve target nodes from a cut filtered by a criterion.
|
@@ -474,7 +475,7 @@ class ResourceUtilizationCalculator:
|
|
474
475
|
Returns:
|
475
476
|
A list of target nodes from a cut.
|
476
477
|
"""
|
477
|
-
cut_nodes =
|
478
|
+
cut_nodes = self.cuts[cut]
|
478
479
|
return self._get_target_activation_nodes(target_criterion, include_reused=True, nodes=cut_nodes)
|
479
480
|
|
480
481
|
def _get_target_weight_nodes(self,
|
@@ -500,7 +501,7 @@ class ResourceUtilizationCalculator:
|
|
500
501
|
quantized = [n for n in self.graph if n.has_any_weight_attr_to_quantize()]
|
501
502
|
configurable = self.graph.get_weights_configurable_nodes(self.fw_info, include_reused_nodes=include_reused)
|
502
503
|
nodes = [n for n in quantized if n not in configurable]
|
503
|
-
elif target_criterion == TargetInclusionCriterion.Any:
|
504
|
+
elif target_criterion == TargetInclusionCriterion.Any: # pragma: no cover
|
504
505
|
nodes = list(self.graph.nodes)
|
505
506
|
else: # pragma: no cover
|
506
507
|
raise ValueError(f'Unknown {target_criterion}.')
|
@@ -566,15 +567,15 @@ class ResourceUtilizationCalculator:
|
|
566
567
|
Selected nodes.
|
567
568
|
"""
|
568
569
|
nodes = nodes or self.graph.nodes
|
569
|
-
if target_criterion == TargetInclusionCriterion.QConfigurable:
|
570
|
+
if target_criterion == TargetInclusionCriterion.QConfigurable: # pragma: no cover
|
570
571
|
nodes = [n for n in nodes if n.has_configurable_activation()]
|
571
572
|
elif target_criterion == TargetInclusionCriterion.AnyQuantized:
|
572
573
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled()]
|
573
|
-
elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
|
574
|
+
elif target_criterion == TargetInclusionCriterion.QNonConfigurable: # pragma: no cover
|
574
575
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
|
575
576
|
elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
|
576
577
|
raise ValueError(f'Unknown {target_criterion}.')
|
577
|
-
if not include_reused:
|
578
|
+
if not include_reused: # pragma: no cover
|
578
579
|
nodes = [n for n in nodes if not n.reuse]
|
579
580
|
return nodes
|
580
581
|
|
@@ -664,4 +665,4 @@ class ResourceUtilizationCalculator:
|
|
664
665
|
f'as it {len(w_qcs)}!=1 unique candidates.')
|
665
666
|
return w_qcs[0].weights_n_bits
|
666
667
|
|
667
|
-
raise ValueError(f'Unknown mode {bitwidth_mode.name}')
|
668
|
+
raise ValueError(f'Unknown mode {bitwidth_mode.name}') # pragma: no cover
|
@@ -16,7 +16,7 @@
|
|
16
16
|
import numpy as np
|
17
17
|
from pulp import *
|
18
18
|
from tqdm import tqdm
|
19
|
-
from typing import Dict, Tuple
|
19
|
+
from typing import Dict, Tuple, Set, Any
|
20
20
|
|
21
21
|
from model_compression_toolkit.logger import Logger
|
22
22
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
|
@@ -167,95 +167,95 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
|
|
167
167
|
indicators_arr = np.array(indicators)
|
168
168
|
indicators_matrix = np.diag(indicators_arr)
|
169
169
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
target=target,
|
176
|
-
target_resource_utilization_value=ru_value,
|
177
|
-
indicators_matrix=indicators_matrix,
|
178
|
-
lp_problem=lp_problem,
|
179
|
-
non_conf_ru_vector=non_conf_ru_vector)
|
170
|
+
_add_ru_constraints(search_manager=search_manager,
|
171
|
+
target_resource_utilization=target_resource_utilization,
|
172
|
+
indicators_matrix=indicators_matrix,
|
173
|
+
lp_problem=lp_problem,
|
174
|
+
non_conf_ru_dict=search_manager.non_conf_ru_dict)
|
180
175
|
else: # pragma: no cover
|
181
176
|
Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
|
182
177
|
"A valid 'target_resource_utilization' is required.")
|
183
178
|
return lp_problem
|
184
179
|
|
185
180
|
|
186
|
-
def
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
non_conf_ru_vector: np.ndarray):
|
181
|
+
def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
|
182
|
+
target_resource_utilization: ResourceUtilization,
|
183
|
+
indicators_matrix: np.ndarray,
|
184
|
+
lp_problem: LpProblem,
|
185
|
+
non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]):
|
192
186
|
"""
|
193
|
-
Adding
|
187
|
+
Adding targets constraints for the Lp problem for the given target resource utilization.
|
194
188
|
The update to the Lp problem object is done inplace.
|
195
189
|
|
196
190
|
Args:
|
197
191
|
search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
|
198
|
-
|
199
|
-
target_resource_utilization_value: Target resource utilization value of the given target resource utilization
|
200
|
-
for which the constraint is added.
|
192
|
+
target_resource_utilization: Target resource utilization.
|
201
193
|
indicators_matrix: A diagonal matrix of the Lp problem's indicators.
|
202
194
|
lp_problem: An Lp problem object to add constraint to.
|
203
|
-
|
204
|
-
|
195
|
+
non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
|
205
196
|
"""
|
197
|
+
ru_indicated_vectors = {}
|
198
|
+
# targets to add constraints for
|
199
|
+
constraints_targets = target_resource_utilization.get_restricted_metrics()
|
200
|
+
# to add constraints for Total target we need to compute weight and activation
|
201
|
+
targets_to_compute = constraints_targets
|
202
|
+
if RUTarget.TOTAL in constraints_targets:
|
203
|
+
targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
204
|
+
|
205
|
+
for target in targets_to_compute:
|
206
|
+
ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
|
207
|
+
indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
|
208
|
+
|
209
|
+
# Sum the indicated values over all configurations, and add the value for minimal configuration once.
|
210
|
+
# Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
|
211
|
+
# that would be required if that configuration is selected).
|
212
|
+
# Each element in a vector is an lp object representing the configurations sum term for a memory element.
|
213
|
+
ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
|
214
|
+
|
215
|
+
non_conf_ru_vec = non_conf_ru_dict[target]
|
216
|
+
if non_conf_ru_vec is not None and non_conf_ru_vec.size:
|
217
|
+
# add non-conf value as additional mem elements so that they get aggregated
|
218
|
+
ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
|
219
|
+
ru_indicated_vectors[target] = ru_vec
|
220
|
+
|
221
|
+
# add constraints only for the restricted targets in target resource utilization.
|
222
|
+
for target in constraints_targets:
|
223
|
+
target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
|
224
|
+
aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
|
225
|
+
for v in aggr_ru:
|
226
|
+
if isinstance(v, float):
|
227
|
+
if v > target_resource_utilization_value:
|
228
|
+
Logger.critical(
|
229
|
+
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
230
|
+
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
231
|
+
else:
|
232
|
+
lp_problem += v <= target_resource_utilization_value
|
233
|
+
|
206
234
|
|
207
|
-
|
208
|
-
indicated_ru_matrix = np.matmul(ru_matrix, indicators_matrix)
|
209
|
-
# Need to re-organize the tensor such that the configurations' axis will be second,
|
210
|
-
# and all metric values' axis will come afterword
|
211
|
-
indicated_ru_matrix = np.moveaxis(indicated_ru_matrix, source=len(indicated_ru_matrix.shape) - 1, destination=1)
|
212
|
-
|
213
|
-
# In order to get the result resource utilization according to a chosen set of indicators, we sum each row in
|
214
|
-
# the result matrix. Each row represents the resource utilization values for a specific resource utilization metric,
|
215
|
-
# such that only elements corresponding to a configuration which implied by the set of indicators will have some
|
216
|
-
# positive value different than 0 (and will contribute to the total resource utilization).
|
217
|
-
ru_sum_vector = np.array([
|
218
|
-
np.sum(indicated_ru_matrix[i], axis=0) + # sum of metric values over all configurations in a row
|
219
|
-
search_manager.min_ru[target][i] for i in range(indicated_ru_matrix.shape[0])])
|
220
|
-
|
221
|
-
ru_vec = ru_sum_vector
|
222
|
-
if non_conf_ru_vector is not None and non_conf_ru_vector.size:
|
223
|
-
ru_vec = np.concatenate([ru_vec, non_conf_ru_vector])
|
224
|
-
|
225
|
-
aggr_ru = _aggregate_for_lp(ru_vec, target)
|
226
|
-
for v in aggr_ru:
|
227
|
-
if isinstance(v, float):
|
228
|
-
if v > target_resource_utilization_value:
|
229
|
-
Logger.critical(
|
230
|
-
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
231
|
-
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
232
|
-
else:
|
233
|
-
lp_problem += v <= target_resource_utilization_value
|
234
|
-
|
235
|
-
|
236
|
-
def _aggregate_for_lp(ru_vec, target: RUTarget) -> list:
|
235
|
+
def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
|
237
236
|
"""
|
238
237
|
Aggregate resource utilization values for the LP.
|
239
238
|
|
240
239
|
Args:
|
241
|
-
|
240
|
+
targets_ru_vec: resource utilization vectors for all precomputed targets.
|
242
241
|
target: resource utilization target.
|
243
242
|
|
244
243
|
Returns:
|
245
244
|
Aggregated resource utilization.
|
246
245
|
"""
|
247
246
|
if target == RUTarget.TOTAL:
|
248
|
-
w = lpSum(
|
249
|
-
|
247
|
+
w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
|
248
|
+
act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
|
249
|
+
return [w + v for v in act_ru_vec]
|
250
250
|
|
251
251
|
if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
|
252
|
-
return [lpSum(
|
252
|
+
return [lpSum(targets_ru_vec[target])]
|
253
253
|
|
254
254
|
if target == RUTarget.ACTIVATION:
|
255
255
|
# for max aggregation, each value constitutes a separate constraint
|
256
|
-
return list(
|
256
|
+
return list(targets_ru_vec[target])
|
257
257
|
|
258
|
-
raise ValueError(f'Unexpected target {target}.')
|
258
|
+
raise ValueError(f'Unexpected target {target}.') # pragma: no cover
|
259
259
|
|
260
260
|
|
261
261
|
def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
File without changes
|
{mct_nightly-2.2.0.20250114.84821.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|