mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py
DELETED
@@ -1,528 +0,0 @@
|
|
1
|
-
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
from enum import Enum
|
16
|
-
from functools import partial
|
17
|
-
from typing import List, Optional
|
18
|
-
from copy import deepcopy
|
19
|
-
|
20
|
-
import numpy as np
|
21
|
-
|
22
|
-
from model_compression_toolkit.core import FrameworkInfo
|
23
|
-
from model_compression_toolkit.core.common import Graph, BaseNode
|
24
|
-
from model_compression_toolkit.constants import BITS_TO_BYTES, FLOAT_BITWIDTH
|
25
|
-
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
26
|
-
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
|
27
|
-
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
|
28
|
-
VirtualSplitWeightsNode, VirtualSplitActivationNode
|
29
|
-
from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
|
30
|
-
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
|
31
|
-
from model_compression_toolkit.logger import Logger
|
32
|
-
|
33
|
-
|
34
|
-
def weights_size_utilization(mp_cfg: List[int],
|
35
|
-
graph: Graph,
|
36
|
-
fw_info: FrameworkInfo,
|
37
|
-
fw_impl: FrameworkImplementation) -> np.ndarray:
|
38
|
-
"""
|
39
|
-
Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
|
40
|
-
according to the given mixed-precision configuration.
|
41
|
-
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
42
|
-
|
43
|
-
Args:
|
44
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
45
|
-
graph: Graph object.
|
46
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
|
47
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation (not used in this method).
|
48
|
-
|
49
|
-
Returns: A vector of node's weights memory sizes.
|
50
|
-
Note that the vector is not necessarily of the same length as the given config.
|
51
|
-
|
52
|
-
"""
|
53
|
-
weights_memory = []
|
54
|
-
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
55
|
-
weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
|
56
|
-
|
57
|
-
if len(mp_cfg) == 0:
|
58
|
-
# Computing non-configurable nodes resource utilization
|
59
|
-
# TODO: when enabling multiple attribute quantization by default (currently,
|
60
|
-
# only kernel quantization is enabled) we should include other attributes memory in the sum of all
|
61
|
-
# weights memory (when quantized to their default 8-bit, non-configurable).
|
62
|
-
# When implementing this, we should just go over all attributes in the node instead of counting only kernels.
|
63
|
-
for n in graph.nodes:
|
64
|
-
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
65
|
-
if kernel_attr is None:
|
66
|
-
continue
|
67
|
-
non_configurable_node = n.name not in weights_mp_nodes \
|
68
|
-
and not n.reuse \
|
69
|
-
and n.is_all_weights_candidates_equal(kernel_attr)
|
70
|
-
|
71
|
-
if non_configurable_node:
|
72
|
-
node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
|
73
|
-
.get_attr_config(kernel_attr).weights_n_bits)
|
74
|
-
node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
|
75
|
-
weights_memory.append(node_weights_memory_in_bytes)
|
76
|
-
else:
|
77
|
-
# Go over configurable all nodes that should be taken into consideration when computing the weights
|
78
|
-
# resource utilization.
|
79
|
-
for n in graph.get_sorted_weights_configurable_nodes(fw_info):
|
80
|
-
# Only nodes with kernel op can be considered configurable
|
81
|
-
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
82
|
-
node_idx = mp_nodes.index(n.name)
|
83
|
-
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
84
|
-
node_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
|
85
|
-
|
86
|
-
node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
|
87
|
-
|
88
|
-
weights_memory.append(node_weights_memory_in_bytes)
|
89
|
-
|
90
|
-
return np.array(weights_memory)
|
91
|
-
|
92
|
-
|
93
|
-
def calc_graph_cuts(graph: Graph) -> List[Cut]:
|
94
|
-
"""
|
95
|
-
Calculate graph activation cuts.
|
96
|
-
Args:
|
97
|
-
graph: A graph object to calculate activation cuts on.
|
98
|
-
|
99
|
-
Returns:
|
100
|
-
A list of activation cuts.
|
101
|
-
|
102
|
-
"""
|
103
|
-
memory_graph = MemoryGraph(deepcopy(graph))
|
104
|
-
_, _, cuts = compute_graph_max_cut(memory_graph)
|
105
|
-
|
106
|
-
if cuts is None:
|
107
|
-
Logger.critical("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
108
|
-
# filter empty cuts and cuts that contain only nodes with activation quantization disabled.
|
109
|
-
filtered_cuts = []
|
110
|
-
for cut in cuts:
|
111
|
-
cut_has_no_act_quant_nodes = any(
|
112
|
-
[graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
|
113
|
-
for e in cut.mem_elements.elements])
|
114
|
-
if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
|
115
|
-
filtered_cuts.append(cut)
|
116
|
-
return filtered_cuts
|
117
|
-
|
118
|
-
|
119
|
-
def activation_maxcut_size_utilization(mp_cfg: List[int],
|
120
|
-
graph: Graph,
|
121
|
-
fw_info: FrameworkInfo,
|
122
|
-
fw_impl: FrameworkImplementation,
|
123
|
-
cuts: Optional[List[Cut]] = None) -> np.ndarray:
|
124
|
-
"""
|
125
|
-
Computes a resource utilization vector with the respective output memory max-cut size for activation
|
126
|
-
nodes, according to the given mixed-precision configuration.
|
127
|
-
|
128
|
-
Args:
|
129
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
130
|
-
graph: Graph object.
|
131
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
|
132
|
-
(not used in this method).
|
133
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
|
134
|
-
cuts: a list of graph cuts (optional. if not provided calculated locally).
|
135
|
-
TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
|
136
|
-
|
137
|
-
Returns: A vector of node's cut memory sizes.
|
138
|
-
Note that the vector is not necessarily of the same length as the given config.
|
139
|
-
|
140
|
-
"""
|
141
|
-
if len(mp_cfg) == 0:
|
142
|
-
# Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
|
143
|
-
# configurable nodes.
|
144
|
-
return np.array([])
|
145
|
-
|
146
|
-
activation_cut_memory = []
|
147
|
-
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
148
|
-
# Go over all nodes that should be taken into consideration when computing the weights memory utilization.
|
149
|
-
nodes_act_nbits = {}
|
150
|
-
for n in graph.get_sorted_activation_configurable_nodes():
|
151
|
-
node_idx = mp_nodes.index(n.name)
|
152
|
-
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
153
|
-
node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
|
154
|
-
nodes_act_nbits[n.name] = node_nbits
|
155
|
-
|
156
|
-
if cuts is None:
|
157
|
-
cuts = calc_graph_cuts(graph)
|
158
|
-
|
159
|
-
for i, cut in enumerate(cuts):
|
160
|
-
mem_elements = [m.node_name for m in cut.mem_elements.elements]
|
161
|
-
mem = 0
|
162
|
-
for op_name in mem_elements:
|
163
|
-
n = graph.find_node_by_name(op_name)[0]
|
164
|
-
if n.is_activation_quantization_enabled():
|
165
|
-
base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
|
166
|
-
mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
|
167
|
-
|
168
|
-
activation_cut_memory.append(mem)
|
169
|
-
|
170
|
-
return np.array(activation_cut_memory)
|
171
|
-
|
172
|
-
|
173
|
-
# TODO maxcut: add test for this function and remove no cover
|
174
|
-
def activation_output_size_utilization(mp_cfg: List[int],
|
175
|
-
graph: Graph,
|
176
|
-
fw_info: FrameworkInfo,
|
177
|
-
fw_impl: FrameworkImplementation) -> np.ndarray: # pragma: no cover
|
178
|
-
"""
|
179
|
-
Computes a resource utilization vector with the respective output memory size for each activation configurable node,
|
180
|
-
according to the given mixed-precision configuration.
|
181
|
-
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
182
|
-
|
183
|
-
Args:
|
184
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
185
|
-
graph: Graph object.
|
186
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
|
187
|
-
(not used in this method).
|
188
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
|
189
|
-
|
190
|
-
Returns: A vector of node's activation memory sizes.
|
191
|
-
Note that the vector is not necessarily of the same length as the given config.
|
192
|
-
|
193
|
-
"""
|
194
|
-
activation_memory = []
|
195
|
-
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
196
|
-
activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
|
197
|
-
|
198
|
-
if len(mp_cfg) == 0:
|
199
|
-
# Computing non-configurable nodes resource utilization
|
200
|
-
for n in graph.nodes:
|
201
|
-
non_configurable_node = n.name not in activation_mp_nodes \
|
202
|
-
and n.has_activation_quantization_enabled_candidate() \
|
203
|
-
and n.is_all_activation_candidates_equal()
|
204
|
-
|
205
|
-
if non_configurable_node:
|
206
|
-
node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
|
207
|
-
node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
|
208
|
-
activation_memory.append(node_activation_memory_in_bytes)
|
209
|
-
else:
|
210
|
-
# Go over all nodes that should be taken into consideration when computing the weights memory utilization.
|
211
|
-
for n in graph.get_sorted_activation_configurable_nodes():
|
212
|
-
node_idx = mp_nodes.index(n.name)
|
213
|
-
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
214
|
-
node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
|
215
|
-
|
216
|
-
node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
|
217
|
-
|
218
|
-
activation_memory.append(node_activation_memory_in_bytes)
|
219
|
-
|
220
|
-
return np.array(activation_memory)
|
221
|
-
|
222
|
-
|
223
|
-
def total_weights_activation_utilization(mp_cfg: List[int],
|
224
|
-
graph: Graph,
|
225
|
-
fw_info: FrameworkInfo,
|
226
|
-
fw_impl: FrameworkImplementation) -> np.ndarray:
|
227
|
-
"""
|
228
|
-
Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
|
229
|
-
according to the given mixed-precision configuration.
|
230
|
-
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
231
|
-
|
232
|
-
Args:
|
233
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
234
|
-
graph: Graph object.
|
235
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
|
236
|
-
(not used in this method).
|
237
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
|
238
|
-
|
239
|
-
Returns: A 2D tensor of nodes' weights memory sizes and activation output memory size.
|
240
|
-
Note that the vector is not necessarily of the same length as the given config.
|
241
|
-
|
242
|
-
"""
|
243
|
-
weights_activation_memory = []
|
244
|
-
weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
|
245
|
-
activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
|
246
|
-
|
247
|
-
if len(mp_cfg) == 0:
|
248
|
-
# Computing non-configurable nodes utilization
|
249
|
-
for n in graph.nodes:
|
250
|
-
|
251
|
-
non_configurable = False
|
252
|
-
node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
|
253
|
-
|
254
|
-
# Non-configurable Weights
|
255
|
-
# TODO: currently considering only kernel attributes in weights memory utilization.
|
256
|
-
# When enabling multi-attribute quantization we need to modify this method to count all attributes.
|
257
|
-
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
258
|
-
if kernel_attr is not None:
|
259
|
-
is_non_configurable_weights = n.name not in weights_mp_nodes and \
|
260
|
-
n.is_all_weights_candidates_equal(kernel_attr) and \
|
261
|
-
not n.reuse
|
262
|
-
|
263
|
-
if is_non_configurable_weights:
|
264
|
-
node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
|
265
|
-
.get_attr_config(kernel_attr).weights_n_bits)
|
266
|
-
node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
|
267
|
-
non_configurable = True
|
268
|
-
|
269
|
-
# Non-configurable Activation
|
270
|
-
is_non_configurable_activation = n.name not in activation_mp_nodes and \
|
271
|
-
n.has_activation_quantization_enabled_candidate() and \
|
272
|
-
n.is_all_activation_candidates_equal()
|
273
|
-
|
274
|
-
if is_non_configurable_activation:
|
275
|
-
node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
|
276
|
-
node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
|
277
|
-
non_configurable = True
|
278
|
-
|
279
|
-
if non_configurable:
|
280
|
-
weights_activation_memory.append(
|
281
|
-
np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
|
282
|
-
else:
|
283
|
-
# Go over all nodes that should be taken into consideration when computing the weights or
|
284
|
-
# activation memory utilization (all configurable nodes).
|
285
|
-
for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
|
286
|
-
# TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
|
287
|
-
# quantization we need to modify this method to count all attributes.
|
288
|
-
|
289
|
-
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
290
|
-
|
291
|
-
# Compute node's weights memory (if no weights to quantize then set to 0)
|
292
|
-
node_weights_memory_in_bytes = 0
|
293
|
-
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
294
|
-
if kernel_attr is not None:
|
295
|
-
if n.is_weights_quantization_enabled(kernel_attr) and not n.is_all_weights_candidates_equal(kernel_attr):
|
296
|
-
node_weights_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
|
297
|
-
node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_weights_nbits, fw_info)
|
298
|
-
|
299
|
-
# Compute node's activation memory (if node's activation are not being quantized then set to 0)
|
300
|
-
node_activation_nbits = node_qc.activation_quantization_cfg.activation_n_bits
|
301
|
-
node_activation_memory_in_bytes = 0
|
302
|
-
if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal():
|
303
|
-
node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_activation_nbits)
|
304
|
-
|
305
|
-
weights_activation_memory.append(np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
|
306
|
-
|
307
|
-
return np.array(weights_activation_memory)
|
308
|
-
|
309
|
-
|
310
|
-
def bops_utilization(mp_cfg: List[int],
|
311
|
-
graph: Graph,
|
312
|
-
fw_info: FrameworkInfo,
|
313
|
-
fw_impl: FrameworkImplementation,
|
314
|
-
set_constraints: bool = True) -> np.ndarray:
|
315
|
-
"""
|
316
|
-
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
317
|
-
according to the given mixed-precision configuration of a virtual graph with composed nodes.
|
318
|
-
|
319
|
-
Args:
|
320
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
321
|
-
graph: Graph object.
|
322
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
|
323
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
324
|
-
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
325
|
-
given config not for LP formalization purposes.
|
326
|
-
|
327
|
-
Returns: A vector of node's BOPS count.
|
328
|
-
Note that the vector is not necessarily of the same length as the given config.
|
329
|
-
|
330
|
-
"""
|
331
|
-
|
332
|
-
if not set_constraints:
|
333
|
-
return _bops_utilization(mp_cfg,
|
334
|
-
graph,
|
335
|
-
fw_info,
|
336
|
-
fw_impl)
|
337
|
-
|
338
|
-
# BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
|
339
|
-
# for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
|
340
|
-
|
341
|
-
virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
|
342
|
-
|
343
|
-
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
344
|
-
bops = [n.get_bops_count(fw_impl, fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes)) for n in virtual_bops_nodes]
|
345
|
-
|
346
|
-
return np.array(bops)
|
347
|
-
|
348
|
-
|
349
|
-
def _bops_utilization(mp_cfg: List[int],
|
350
|
-
graph: Graph,
|
351
|
-
fw_info: FrameworkInfo,
|
352
|
-
fw_impl: FrameworkImplementation) -> np.ndarray:
|
353
|
-
"""
|
354
|
-
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
355
|
-
according to the given mixed-precision configuration of an original graph.
|
356
|
-
|
357
|
-
Args:
|
358
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
359
|
-
graph: Graph object.
|
360
|
-
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
|
361
|
-
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
362
|
-
|
363
|
-
Returns: A vector of node's BOPS count.
|
364
|
-
|
365
|
-
"""
|
366
|
-
|
367
|
-
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
368
|
-
|
369
|
-
# Go over all nodes that should be taken into consideration when computing the BOPS utilization.
|
370
|
-
bops = []
|
371
|
-
for n in graph.get_topo_sorted_nodes():
|
372
|
-
if n.has_kernel_weight_to_quantize(fw_info) and not n.has_positional_weights:
|
373
|
-
# If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
|
374
|
-
incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
|
375
|
-
if len(incoming_edges) != 1:
|
376
|
-
Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.") # pragma: no cover
|
377
|
-
input_activation_node = incoming_edges[0].source_node
|
378
|
-
if len(graph.out_edges(input_activation_node)) > 1:
|
379
|
-
# In the case where the activation node has multiple outgoing edges
|
380
|
-
# we don't consider this edge in the BOPS utilization calculation
|
381
|
-
continue
|
382
|
-
|
383
|
-
input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
|
384
|
-
|
385
|
-
node_mac = fw_impl.get_node_mac_operations(n, fw_info)
|
386
|
-
|
387
|
-
node_qc = n.candidates_quantization_cfg[_get_node_cfg_idx(n, mp_cfg, mp_nodes)]
|
388
|
-
kenrel_node_qc = node_qc.weights_quantization_cfg.get_attr_config(fw_info.get_kernel_op_attributes(n.type)[0])
|
389
|
-
node_weights_nbits = kenrel_node_qc.weights_n_bits if \
|
390
|
-
kenrel_node_qc.enable_weights_quantization else FLOAT_BITWIDTH
|
391
|
-
input_activation_nbits = input_activation_node_cfg.activation_quantization_cfg.activation_n_bits if \
|
392
|
-
input_activation_node_cfg.activation_quantization_cfg.enable_activation_quantization else FLOAT_BITWIDTH
|
393
|
-
|
394
|
-
node_bops = node_weights_nbits * input_activation_nbits * node_mac
|
395
|
-
bops.append(node_bops)
|
396
|
-
|
397
|
-
return np.array(bops)
|
398
|
-
|
399
|
-
|
400
|
-
def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nodes_names: List[str]) -> int:
|
401
|
-
"""
|
402
|
-
Returns the index of a node's quantization configuration candidate according to the given
|
403
|
-
mixed-precision configuration. If the node is not configurable, then it must have a single configuration,
|
404
|
-
therefore, the index 0 is returned.
|
405
|
-
|
406
|
-
Args:
|
407
|
-
node: A node to get its candidate configuration index.
|
408
|
-
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
409
|
-
sorted_configurable_nodes_names: A list of configurable nodes names.
|
410
|
-
|
411
|
-
Returns: An index (integer) of a node's quantization configuration candidate.
|
412
|
-
"""
|
413
|
-
|
414
|
-
if node.name in sorted_configurable_nodes_names:
|
415
|
-
node_idx = sorted_configurable_nodes_names.index(node.name)
|
416
|
-
return mp_cfg[node_idx]
|
417
|
-
else:
|
418
|
-
assert len(node.candidates_quantization_cfg) > 0, \
|
419
|
-
"Any node should have at least one candidate configuration."
|
420
|
-
return 0
|
421
|
-
|
422
|
-
|
423
|
-
def _get_origin_weights_node(n: BaseNode) -> BaseNode:
|
424
|
-
"""
|
425
|
-
In case we run a resource utilization computation on a virtual graph,
|
426
|
-
this method is used to retrieve the original node out of a virtual weights node,
|
427
|
-
|
428
|
-
Args:
|
429
|
-
n: A possibly virtual node.
|
430
|
-
|
431
|
-
Returns: A node from the original (non-virtual) graph which the given node represents.
|
432
|
-
|
433
|
-
"""
|
434
|
-
|
435
|
-
if isinstance(n, VirtualActivationWeightsNode):
|
436
|
-
return n.original_weights_node
|
437
|
-
if isinstance(n, VirtualSplitWeightsNode):
|
438
|
-
return n.origin_node
|
439
|
-
|
440
|
-
return n
|
441
|
-
|
442
|
-
|
443
|
-
def _get_origin_activation_node(n: BaseNode) -> BaseNode:
|
444
|
-
"""
|
445
|
-
In case we run a resource utilization computation on a virtual graph,
|
446
|
-
this method is used to retrieve the original node out of a virtual activation node,
|
447
|
-
|
448
|
-
Args:
|
449
|
-
n: A possibly virtual node.
|
450
|
-
|
451
|
-
Returns: A node from the original (non-virtual) graph which the given node represents.
|
452
|
-
|
453
|
-
"""
|
454
|
-
|
455
|
-
if isinstance(n, VirtualActivationWeightsNode):
|
456
|
-
return n.original_activation_node
|
457
|
-
if isinstance(n, VirtualSplitActivationNode):
|
458
|
-
return n.origin_node
|
459
|
-
|
460
|
-
return n
|
461
|
-
|
462
|
-
|
463
|
-
def _compute_node_weights_memory(n: BaseNode, node_nbits: int, fw_info: FrameworkInfo) -> float:
|
464
|
-
"""
|
465
|
-
Computes the weights' memory of the given node.
|
466
|
-
|
467
|
-
Args:
|
468
|
-
n: A node to compute its weights' memory.
|
469
|
-
node_nbits: A bit-width in which the node's weights should be quantized.
|
470
|
-
fw_info: FrameworkInfo object about the specific framework.
|
471
|
-
|
472
|
-
Returns: The total memory of the node's weights when quantized to the given bit-width.
|
473
|
-
|
474
|
-
"""
|
475
|
-
|
476
|
-
origin_node = _get_origin_weights_node(n)
|
477
|
-
|
478
|
-
node_num_weights_params = 0
|
479
|
-
for attr in fw_info.get_kernel_op_attributes(origin_node.type):
|
480
|
-
if attr is not None:
|
481
|
-
node_num_weights_params += origin_node.get_weights_by_keys(attr).flatten().shape[0]
|
482
|
-
|
483
|
-
return node_num_weights_params * node_nbits / BITS_TO_BYTES
|
484
|
-
|
485
|
-
|
486
|
-
def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
|
487
|
-
"""
|
488
|
-
Computes the activation tensor memory of the given node.
|
489
|
-
|
490
|
-
Args:
|
491
|
-
n: A node to compute its activation tensor memory.
|
492
|
-
node_nbits: A bit-width in which the node's weights should be quantized.
|
493
|
-
|
494
|
-
Returns: The total memory of the node's activation tensor when quantized to the given bit-width.
|
495
|
-
|
496
|
-
"""
|
497
|
-
|
498
|
-
origin_node = _get_origin_activation_node(n)
|
499
|
-
node_output_size = origin_node.get_total_output_params()
|
500
|
-
|
501
|
-
return node_output_size * node_nbits / BITS_TO_BYTES
|
502
|
-
|
503
|
-
|
504
|
-
class MpRuMetric(Enum):
|
505
|
-
"""
|
506
|
-
Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
|
507
|
-
for a given mp config. The enum values can be used to call a function on a set of arguments.
|
508
|
-
|
509
|
-
WEIGHTS_SIZE - applies the weights_size_utilization function
|
510
|
-
|
511
|
-
ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
|
512
|
-
|
513
|
-
ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
|
514
|
-
|
515
|
-
TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
|
516
|
-
|
517
|
-
BOPS_COUNT - applies the bops_utilization function
|
518
|
-
|
519
|
-
"""
|
520
|
-
|
521
|
-
WEIGHTS_SIZE = partial(weights_size_utilization)
|
522
|
-
ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
|
523
|
-
ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
|
524
|
-
TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
|
525
|
-
BOPS_COUNT = partial(bops_utilization)
|
526
|
-
|
527
|
-
def __call__(self, *args):
|
528
|
-
return self.value(*args)
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
|
16
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import AttributeFilter
|
17
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities, OperationsSetToLayers, Smaller, SmallerEq, NotEq, Eq, GreaterEq, Greater, LayerFilterParams, OperationsToLayers, get_current_tpc
|
18
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSet, \
|
19
|
-
OperatorSetConcat, Signedness, AttributeQuantizationConfig, OpQuantizationConfig, QuantizationConfigOptions, Fusing
|
20
|
-
|
21
|
-
from mct_quantizers import QuantizationMethod
|
22
|
-
|
23
|
-
|
File without changes
|
{mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|