mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -13,27 +13,23 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
import copy
|
16
|
-
from
|
16
|
+
from typing import Callable, Any
|
17
17
|
|
18
|
-
import numpy as np
|
19
|
-
from typing import Callable, Any, Dict, Tuple
|
20
|
-
|
21
|
-
from model_compression_toolkit.logger import Logger
|
22
|
-
from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
|
23
18
|
from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
|
24
19
|
from model_compression_toolkit.core.common import Graph
|
25
20
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
26
|
-
from model_compression_toolkit.core.common.
|
21
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
22
|
+
RUTarget
|
23
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
|
24
|
+
ResourceUtilizationCalculator, BitwidthMode, TargetInclusionCriterion
|
27
25
|
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
|
28
|
-
from model_compression_toolkit.target_platform_capabilities
|
29
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions
|
30
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import calc_graph_cuts
|
26
|
+
from model_compression_toolkit.target_platform_capabilities import FrameworkQuantizationCapabilities
|
31
27
|
|
32
28
|
|
33
29
|
def compute_resource_utilization_data(in_model: Any,
|
34
30
|
representative_data_gen: Callable,
|
35
31
|
core_config: CoreConfig,
|
36
|
-
|
32
|
+
fqc: FrameworkQuantizationCapabilities,
|
37
33
|
fw_info: FrameworkInfo,
|
38
34
|
fw_impl: FrameworkImplementation,
|
39
35
|
transformed_graph: Graph = None,
|
@@ -47,7 +43,7 @@ def compute_resource_utilization_data(in_model: Any,
|
|
47
43
|
in_model: Model to build graph from (the model that intended to be quantized).
|
48
44
|
representative_data_gen: Dataset used for calibration.
|
49
45
|
core_config: CoreConfig containing parameters of how the model should be quantized.
|
50
|
-
|
46
|
+
fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
|
51
47
|
the attached framework operator's information.
|
52
48
|
fw_info: Information needed for quantization about the specific framework.
|
53
49
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
@@ -70,183 +66,23 @@ def compute_resource_utilization_data(in_model: Any,
|
|
70
66
|
core_config.quantization_config,
|
71
67
|
fw_info,
|
72
68
|
fw_impl,
|
73
|
-
|
69
|
+
fqc,
|
74
70
|
bit_width_config=core_config.bit_width_config,
|
75
|
-
mixed_precision_enable=mixed_precision_enable
|
76
|
-
|
77
|
-
# Compute parameters sum
|
78
|
-
weights_memory_bytes, weights_params = compute_nodes_weights_params(graph=transformed_graph, fw_info=fw_info)
|
79
|
-
total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)
|
80
|
-
|
81
|
-
# Compute max activation tensor
|
82
|
-
activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_maxcut_sizes(graph=transformed_graph)
|
83
|
-
max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
|
84
|
-
|
85
|
-
# Compute total memory utilization - parameters sum + max activation tensor
|
86
|
-
total_size = total_weights_params + max_activation_tensor_size
|
87
|
-
|
88
|
-
# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
|
89
|
-
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
|
90
|
-
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
|
91
|
-
|
92
|
-
return ResourceUtilization(weights_memory=total_weights_params,
|
93
|
-
activation_memory=max_activation_tensor_size,
|
94
|
-
total_memory=total_size,
|
95
|
-
bops=bops_count)
|
96
|
-
|
97
|
-
|
98
|
-
def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[np.ndarray, np.ndarray]:
|
99
|
-
"""
|
100
|
-
Calculates the memory usage in bytes and the number of weight parameters for each node within a graph.
|
101
|
-
Memory calculations are based on the maximum bit-width used for quantization per node.
|
102
|
-
|
103
|
-
Args:
|
104
|
-
graph: A finalized Graph object, representing the model structure.
|
105
|
-
fw_info: FrameworkInfo object containing details about the specific framework's
|
106
|
-
quantization attributes for different layers' weights.
|
107
|
-
|
108
|
-
Returns:
|
109
|
-
A tuple containing two arrays:
|
110
|
-
- The first array represents the memory in bytes for each node's weights when quantized at the maximal bit-width.
|
111
|
-
- The second array represents the total number of weight parameters for each node.
|
112
|
-
"""
|
113
|
-
weights_params = []
|
114
|
-
weights_memory_bytes = []
|
115
|
-
for n in graph.nodes:
|
116
|
-
# TODO: when enabling multiple attribute quantization by default (currently,
|
117
|
-
# only kernel quantization is enabled) we should include other attributes memory in the sum of all
|
118
|
-
# weights memory.
|
119
|
-
# When implementing this, we should just go over all attributes in the node instead of counting only kernels.
|
120
|
-
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
121
|
-
if kernel_attr is not None and not n.reuse:
|
122
|
-
kernel_candidates = n.get_all_weights_attr_candidates(kernel_attr)
|
123
|
-
|
124
|
-
if len(kernel_candidates) > 0 and any([c.enable_weights_quantization for c in kernel_candidates]):
|
125
|
-
max_weight_bits = max([kc.weights_n_bits for kc in kernel_candidates])
|
126
|
-
node_num_weights_params = 0
|
127
|
-
for attr in fw_info.get_kernel_op_attributes(n.type):
|
128
|
-
if attr is not None:
|
129
|
-
node_num_weights_params += n.get_weights_by_keys(attr).flatten().shape[0]
|
130
|
-
|
131
|
-
weights_params.append(node_num_weights_params)
|
132
|
-
|
133
|
-
# multiply num params by num bits and divide by BITS_TO_BYTES to convert from bits to bytes
|
134
|
-
weights_memory_bytes.append(node_num_weights_params * max_weight_bits / BITS_TO_BYTES)
|
135
|
-
|
136
|
-
return np.array(weights_memory_bytes), np.array(weights_params)
|
137
|
-
|
138
|
-
|
139
|
-
def compute_activation_output_maxcut_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
|
140
|
-
"""
|
141
|
-
Computes an array of the respective output tensor maxcut size and an array of the output tensor
|
142
|
-
cut size in bytes for each cut.
|
143
|
-
|
144
|
-
Args:
|
145
|
-
graph: A finalized Graph object, representing the model structure.
|
146
|
-
|
147
|
-
Returns:
|
148
|
-
A tuple containing two arrays:
|
149
|
-
- The first is an array of the size of each activation max-cut size in bytes, calculated
|
150
|
-
using the maximal bit-width for quantization.
|
151
|
-
- The second array an array of the size of each activation max-cut activation size in number of parameters.
|
152
|
-
|
153
|
-
"""
|
154
|
-
cuts = calc_graph_cuts(graph)
|
155
|
-
|
156
|
-
# map nodes to cuts.
|
157
|
-
node_to_cat_mapping = defaultdict(list)
|
158
|
-
for i, cut in enumerate(cuts):
|
159
|
-
mem_element_names = [m.node_name for m in cut.mem_elements.elements]
|
160
|
-
for m_name in mem_element_names:
|
161
|
-
if len(graph.find_node_by_name(m_name)) > 0:
|
162
|
-
node_to_cat_mapping[m_name].append(i)
|
163
|
-
else:
|
164
|
-
Logger.critical(f"Missing node: {m_name}") # pragma: no cover
|
71
|
+
mixed_precision_enable=mixed_precision_enable,
|
72
|
+
running_gptq=False)
|
165
73
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
# Fetch maximum bits required for activations quantization.
|
172
|
-
max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
|
173
|
-
node_output_size = n.get_total_output_params()
|
174
|
-
for cut_index in node_to_cat_mapping[n.name]:
|
175
|
-
activation_outputs[cut_index] += node_output_size
|
176
|
-
# Calculate activation size in bytes and append to list
|
177
|
-
activation_outputs_bytes[cut_index] += node_output_size * max_activation_bits / BITS_TO_BYTES
|
178
|
-
|
179
|
-
return activation_outputs_bytes, activation_outputs
|
180
|
-
|
181
|
-
|
182
|
-
# TODO maxcut: add test for this function and remove no cover
|
183
|
-
def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]: # pragma: no cover
|
184
|
-
"""
|
185
|
-
Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
|
186
|
-
each node.
|
187
|
-
|
188
|
-
Args:
|
189
|
-
graph: A finalized Graph object, representing the model structure.
|
190
|
-
|
191
|
-
Returns:
|
192
|
-
A tuple containing two arrays:
|
193
|
-
- The first array represents the size of each node's activation output tensor size in bytes,
|
194
|
-
calculated using the maximal bit-width for quantization.
|
195
|
-
- The second array represents the size of each node's activation output tensor size.
|
196
|
-
|
197
|
-
"""
|
198
|
-
activation_outputs = []
|
199
|
-
activation_outputs_bytes = []
|
200
|
-
for n in graph.nodes:
|
201
|
-
# Go over all nodes that have configurable activation.
|
202
|
-
if n.has_activation_quantization_enabled_candidate():
|
203
|
-
# Fetch maximum bits required for quantizing activations
|
204
|
-
max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
|
205
|
-
node_output_size = n.get_total_output_params()
|
206
|
-
activation_outputs.append(node_output_size)
|
207
|
-
# Calculate activation size in bytes and append to list
|
208
|
-
activation_outputs_bytes.append(node_output_size * max_activation_bits / BITS_TO_BYTES)
|
209
|
-
|
210
|
-
return np.array(activation_outputs_bytes), np.array(activation_outputs)
|
211
|
-
|
212
|
-
|
213
|
-
def compute_total_bops(graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation) -> np.ndarray:
|
214
|
-
"""
|
215
|
-
Computes a vector with the respective Bit-operations count for each configurable node that includes MAC operations.
|
216
|
-
The computation assumes that the graph is a representation of a float model, thus, BOPs computation uses 32-bit.
|
217
|
-
|
218
|
-
Args:
|
219
|
-
graph: Finalized Graph object.
|
220
|
-
fw_info: FrameworkInfo object about the specific framework
|
221
|
-
(e.g., attributes of different layers' weights to quantize).
|
222
|
-
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
223
|
-
|
224
|
-
Returns: A vector of nodes' Bit-operations count.
|
225
|
-
|
226
|
-
"""
|
227
|
-
|
228
|
-
bops = []
|
229
|
-
|
230
|
-
# Go over all configurable nodes that have kernels.
|
231
|
-
for n in graph.get_topo_sorted_nodes():
|
232
|
-
if n.has_kernel_weight_to_quantize(fw_info):
|
233
|
-
# If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
|
234
|
-
incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
|
235
|
-
assert len(incoming_edges) == 1, f"Can't compute BOPS metric for node {n.name} with multiple inputs."
|
236
|
-
|
237
|
-
node_mac = fw_impl.get_node_mac_operations(n, fw_info)
|
238
|
-
|
239
|
-
node_bops = (FLOAT_BITWIDTH ** 2) * node_mac
|
240
|
-
bops.append(node_bops)
|
241
|
-
|
242
|
-
return np.array(bops)
|
74
|
+
ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
|
75
|
+
ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.Q8Bit,
|
76
|
+
ru_targets=set(RUTarget) - {RUTarget.BOPS})
|
77
|
+
ru.bops, _ = ru_calculator.compute_bops(TargetInclusionCriterion.AnyQuantized, BitwidthMode.Float)
|
78
|
+
return ru
|
243
79
|
|
244
80
|
|
245
81
|
def requires_mixed_precision(in_model: Any,
|
246
82
|
target_resource_utilization: ResourceUtilization,
|
247
83
|
representative_data_gen: Callable,
|
248
84
|
core_config: CoreConfig,
|
249
|
-
|
85
|
+
fqc: FrameworkQuantizationCapabilities,
|
250
86
|
fw_info: FrameworkInfo,
|
251
87
|
fw_impl: FrameworkImplementation) -> bool:
|
252
88
|
"""
|
@@ -261,14 +97,13 @@ def requires_mixed_precision(in_model: Any,
|
|
261
97
|
target_resource_utilization: The resource utilization of the target device.
|
262
98
|
representative_data_gen: A function that generates representative data for the model.
|
263
99
|
core_config: CoreConfig containing parameters of how the model should be quantized.
|
264
|
-
|
100
|
+
fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
|
265
101
|
the attached framework operator's information.
|
266
102
|
fw_info: Information needed for quantization about the specific framework.
|
267
103
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
268
104
|
|
269
105
|
Returns: A boolean indicating if mixed precision is needed.
|
270
106
|
"""
|
271
|
-
is_mixed_precision = False
|
272
107
|
core_config = _create_core_config_for_ru(core_config)
|
273
108
|
|
274
109
|
transformed_graph = graph_preparation_runner(in_model,
|
@@ -276,27 +111,15 @@ def requires_mixed_precision(in_model: Any,
|
|
276
111
|
core_config.quantization_config,
|
277
112
|
fw_info,
|
278
113
|
fw_impl,
|
279
|
-
|
114
|
+
fqc,
|
280
115
|
bit_width_config=core_config.bit_width_config,
|
281
|
-
mixed_precision_enable=False
|
282
|
-
|
283
|
-
weights_memory_by_layer_bytes, _ = compute_nodes_weights_params(transformed_graph, fw_info)
|
284
|
-
total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)
|
285
|
-
|
286
|
-
# Compute max activation tensor in bytes
|
287
|
-
activation_memory_estimation_bytes, _ = compute_activation_output_maxcut_sizes(transformed_graph)
|
288
|
-
max_activation_memory_estimation_bytes = 0 if len(activation_memory_estimation_bytes) == 0 \
|
289
|
-
else max(activation_memory_estimation_bytes)
|
290
|
-
|
291
|
-
# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
|
292
|
-
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
|
293
|
-
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
|
116
|
+
mixed_precision_enable=False,
|
117
|
+
running_gptq=False)
|
294
118
|
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
return is_mixed_precision
|
119
|
+
ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
|
120
|
+
max_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QMaxBit,
|
121
|
+
ru_targets=target_resource_utilization.get_restricted_metrics())
|
122
|
+
return not target_resource_utilization.is_satisfied_by(max_ru)
|
300
123
|
|
301
124
|
|
302
125
|
def _create_core_config_for_ru(core_config: CoreConfig) -> CoreConfig:
|
@@ -16,7 +16,7 @@
|
|
16
16
|
import numpy as np
|
17
17
|
from pulp import *
|
18
18
|
from tqdm import tqdm
|
19
|
-
from typing import Dict,
|
19
|
+
from typing import Dict, Tuple, Set, Any
|
20
20
|
|
21
21
|
from model_compression_toolkit.logger import Logger
|
22
22
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
|
@@ -167,72 +167,95 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
|
|
167
167
|
indicators_arr = np.array(indicators)
|
168
168
|
indicators_matrix = np.diag(indicators_arr)
|
169
169
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
target=target,
|
176
|
-
target_resource_utilization_value=ru_value,
|
177
|
-
indicators_matrix=indicators_matrix,
|
178
|
-
lp_problem=lp_problem,
|
179
|
-
non_conf_ru_vector=non_conf_ru_vector)
|
170
|
+
_add_ru_constraints(search_manager=search_manager,
|
171
|
+
target_resource_utilization=target_resource_utilization,
|
172
|
+
indicators_matrix=indicators_matrix,
|
173
|
+
lp_problem=lp_problem,
|
174
|
+
non_conf_ru_dict=search_manager.non_conf_ru_dict)
|
180
175
|
else: # pragma: no cover
|
181
176
|
Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
|
182
177
|
"A valid 'target_resource_utilization' is required.")
|
183
178
|
return lp_problem
|
184
179
|
|
185
180
|
|
186
|
-
def
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
non_conf_ru_vector: np.ndarray):
|
181
|
+
def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
|
182
|
+
target_resource_utilization: ResourceUtilization,
|
183
|
+
indicators_matrix: np.ndarray,
|
184
|
+
lp_problem: LpProblem,
|
185
|
+
non_conf_ru_dict: Optional[Dict[RUTarget, np.ndarray]]):
|
192
186
|
"""
|
193
|
-
Adding
|
187
|
+
Adding targets constraints for the Lp problem for the given target resource utilization.
|
194
188
|
The update to the Lp problem object is done inplace.
|
195
189
|
|
196
190
|
Args:
|
197
191
|
search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
|
198
|
-
|
199
|
-
target_resource_utilization_value: Target resource utilization value of the given target resource utilization
|
200
|
-
for which the constraint is added.
|
192
|
+
target_resource_utilization: Target resource utilization.
|
201
193
|
indicators_matrix: A diagonal matrix of the Lp problem's indicators.
|
202
194
|
lp_problem: An Lp problem object to add constraint to.
|
203
|
-
|
195
|
+
non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
|
196
|
+
"""
|
197
|
+
ru_indicated_vectors = {}
|
198
|
+
# targets to add constraints for
|
199
|
+
constraints_targets = target_resource_utilization.get_restricted_metrics()
|
200
|
+
# to add constraints for Total target we need to compute weight and activation
|
201
|
+
targets_to_compute = constraints_targets
|
202
|
+
if RUTarget.TOTAL in constraints_targets:
|
203
|
+
targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
|
204
|
+
|
205
|
+
for target in targets_to_compute:
|
206
|
+
ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations
|
207
|
+
indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations
|
208
|
+
|
209
|
+
# Sum the indicated values over all configurations, and add the value for minimal configuration once.
|
210
|
+
# Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
|
211
|
+
# that would be required if that configuration is selected).
|
212
|
+
# Each element in a vector is an lp object representing the configurations sum term for a memory element.
|
213
|
+
ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
|
214
|
+
|
215
|
+
non_conf_ru_vec = non_conf_ru_dict[target]
|
216
|
+
if non_conf_ru_vec is not None and non_conf_ru_vec.size:
|
217
|
+
# add non-conf value as additional mem elements so that they get aggregated
|
218
|
+
ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
|
219
|
+
ru_indicated_vectors[target] = ru_vec
|
220
|
+
|
221
|
+
# add constraints only for the restricted targets in target resource utilization.
|
222
|
+
for target in constraints_targets:
|
223
|
+
target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
|
224
|
+
aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
|
225
|
+
for v in aggr_ru:
|
226
|
+
if isinstance(v, float):
|
227
|
+
if v > target_resource_utilization_value:
|
228
|
+
Logger.critical(
|
229
|
+
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
230
|
+
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
231
|
+
else:
|
232
|
+
lp_problem += v <= target_resource_utilization_value
|
233
|
+
|
204
234
|
|
235
|
+
def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
|
205
236
|
"""
|
237
|
+
Aggregate resource utilization values for the LP.
|
206
238
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
for v in aggr_ru:
|
229
|
-
if isinstance(v, float):
|
230
|
-
if v > target_resource_utilization_value:
|
231
|
-
Logger.critical(
|
232
|
-
f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
|
233
|
-
f"with the value {target_resource_utilization_value}.") # pragma: no cover
|
234
|
-
else:
|
235
|
-
lp_problem += v <= target_resource_utilization_value
|
239
|
+
Args:
|
240
|
+
targets_ru_vec: resource utilization vectors for all precomputed targets.
|
241
|
+
target: resource utilization target.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
Aggregated resource utilization.
|
245
|
+
"""
|
246
|
+
if target == RUTarget.TOTAL:
|
247
|
+
w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
|
248
|
+
act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
|
249
|
+
return [w + v for v in act_ru_vec]
|
250
|
+
|
251
|
+
if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
|
252
|
+
return [lpSum(targets_ru_vec[target])]
|
253
|
+
|
254
|
+
if target == RUTarget.ACTIVATION:
|
255
|
+
# for max aggregation, each value constitutes a separate constraint
|
256
|
+
return list(targets_ru_vec[target])
|
257
|
+
|
258
|
+
raise ValueError(f'Unexpected target {target}.') # pragma: no cover
|
236
259
|
|
237
260
|
|
238
261
|
def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
@@ -113,11 +113,9 @@ class SensitivityEvaluation:
|
|
113
113
|
# in the new built MP model.
|
114
114
|
self.baseline_model, self.model_mp, self.conf_node2layers = self._build_models()
|
115
115
|
|
116
|
-
# Build images batches for inference comparison
|
117
|
-
|
118
|
-
|
119
|
-
# Casting images tensors to the framework tensor type.
|
120
|
-
self.images_batches = [self.fw_impl.to_tensor(img) for img in self.images_batches]
|
116
|
+
# Build images batches for inference comparison and cat to framework type
|
117
|
+
images_batches = self._get_images_batches(quant_config.num_of_images)
|
118
|
+
self.images_batches = [self.fw_impl.to_tensor(img) for img in images_batches]
|
121
119
|
|
122
120
|
# Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
|
123
121
|
self.baseline_tensors_list = self._init_baseline_tensors_list()
|
@@ -80,8 +80,8 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
|
|
80
80
|
updated_ru.append(node_updated_ru)
|
81
81
|
|
82
82
|
# filter out new configs that don't hold the resource utilization restrictions
|
83
|
-
node_filtered_ru = [(node_idx, ru) for node_idx, ru in zip(valid_candidates, updated_ru)
|
84
|
-
target_resource_utilization.
|
83
|
+
node_filtered_ru = [(node_idx, ru) for node_idx, ru in zip(valid_candidates, updated_ru)
|
84
|
+
if target_resource_utilization.is_satisfied_by(ru)]
|
85
85
|
|
86
86
|
if len(node_filtered_ru) > 0:
|
87
87
|
sorted_by_ru = sorted(node_filtered_ru, key=lambda node_ru: (node_ru[1].total_memory,
|
@@ -24,7 +24,8 @@ from model_compression_toolkit.core.common.pruning.memory_calculator import Memo
|
|
24
24
|
from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation
|
25
25
|
from model_compression_toolkit.core.common.pruning.mask.per_simd_group_mask import PerSIMDGroupMask
|
26
26
|
from model_compression_toolkit.logger import Logger
|
27
|
-
from model_compression_toolkit.target_platform_capabilities.
|
27
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
|
28
|
+
FrameworkQuantizationCapabilities
|
28
29
|
|
29
30
|
|
30
31
|
class GreedyMaskCalculator:
|
@@ -42,7 +43,7 @@ class GreedyMaskCalculator:
|
|
42
43
|
target_resource_utilization: ResourceUtilization,
|
43
44
|
graph: Graph,
|
44
45
|
fw_impl: PruningFrameworkImplementation,
|
45
|
-
|
46
|
+
fqc: FrameworkQuantizationCapabilities,
|
46
47
|
simd_groups_indices: Dict[BaseNode, List[List[int]]]):
|
47
48
|
"""
|
48
49
|
Args:
|
@@ -52,7 +53,7 @@ class GreedyMaskCalculator:
|
|
52
53
|
target_resource_utilization (ResourceUtilization): The target resource utilization to achieve.
|
53
54
|
graph (Graph): The computational graph of the model.
|
54
55
|
fw_impl (PruningFrameworkImplementation): Framework-specific implementation details.
|
55
|
-
|
56
|
+
fqc (FrameworkQuantizationCapabilities): Platform-specific constraints and capabilities.
|
56
57
|
simd_groups_indices (Dict[BaseNode, List[List[int]]]): Indices of SIMD groups in each node.
|
57
58
|
"""
|
58
59
|
self.prunable_nodes = prunable_nodes
|
@@ -60,7 +61,7 @@ class GreedyMaskCalculator:
|
|
60
61
|
self.target_resource_utilization = target_resource_utilization
|
61
62
|
self.graph = graph
|
62
63
|
self.fw_impl = fw_impl
|
63
|
-
self.
|
64
|
+
self.fqc = fqc
|
64
65
|
|
65
66
|
self.simd_groups_indices = simd_groups_indices
|
66
67
|
self.simd_groups_scores = simd_groups_scores
|
@@ -90,7 +91,7 @@ class GreedyMaskCalculator:
|
|
90
91
|
"""
|
91
92
|
# Iteratively unprune the graph while monitoring the memory footprint.
|
92
93
|
current_memory = self.memory_calculator.get_pruned_graph_memory(masks=self.oc_pruning_mask.get_mask(),
|
93
|
-
include_padded_channels=self.
|
94
|
+
include_padded_channels=self.fqc.is_simd_padding)
|
94
95
|
if current_memory > self.target_resource_utilization.weights_memory:
|
95
96
|
Logger.critical(f"Insufficient memory for the target resource utilization: current memory {current_memory}, "
|
96
97
|
f"target memory {self.target_resource_utilization.weights_memory}.")
|
@@ -105,7 +106,7 @@ class GreedyMaskCalculator:
|
|
105
106
|
group_index=group_to_remain_idx,
|
106
107
|
mask_indicator=MaskIndicator.REMAINED)
|
107
108
|
current_memory = self.memory_calculator.get_pruned_graph_memory(masks=self.oc_pruning_mask.get_mask(),
|
108
|
-
include_padded_channels=self.
|
109
|
+
include_padded_channels=self.fqc.is_simd_padding)
|
109
110
|
|
110
111
|
# If the target memory is exceeded, revert the last addition.
|
111
112
|
if current_memory > self.target_resource_utilization.weights_memory:
|
@@ -23,7 +23,6 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
|
|
23
23
|
from model_compression_toolkit.core.common.pruning.memory_calculator import MemoryCalculator
|
24
24
|
from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation
|
25
25
|
from model_compression_toolkit.logger import Logger
|
26
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
27
26
|
|
28
27
|
class MaskIndicator(Enum):
|
29
28
|
"""
|
@@ -23,7 +23,6 @@ from model_compression_toolkit.core.common.pruning.mask.per_channel_mask import
|
|
23
23
|
from model_compression_toolkit.core.common.pruning.memory_calculator import MemoryCalculator
|
24
24
|
from model_compression_toolkit.core.common.pruning.pruning_framework_implementation import PruningFrameworkImplementation
|
25
25
|
from model_compression_toolkit.logger import Logger
|
26
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
27
26
|
|
28
27
|
class PerSIMDGroupMask:
|
29
28
|
def __init__(self,
|
@@ -29,7 +29,9 @@ from model_compression_toolkit.core.common.pruning.pruning_framework_implementat
|
|
29
29
|
from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo, \
|
30
30
|
unroll_simd_scores_to_per_channel_scores
|
31
31
|
from model_compression_toolkit.logger import Logger
|
32
|
-
from model_compression_toolkit.target_platform_capabilities.
|
32
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import \
|
33
|
+
FrameworkQuantizationCapabilities
|
34
|
+
|
33
35
|
|
34
36
|
class Pruner:
|
35
37
|
"""
|
@@ -43,7 +45,7 @@ class Pruner:
|
|
43
45
|
target_resource_utilization: ResourceUtilization,
|
44
46
|
representative_data_gen: Callable,
|
45
47
|
pruning_config: PruningConfig,
|
46
|
-
target_platform_capabilities:
|
48
|
+
target_platform_capabilities: FrameworkQuantizationCapabilities):
|
47
49
|
"""
|
48
50
|
Args:
|
49
51
|
float_graph (Graph): The floating-point representation of the model's computation graph.
|
@@ -52,7 +54,7 @@ class Pruner:
|
|
52
54
|
target_resource_utilization (ResourceUtilization): The target resource utilization to be achieved after pruning.
|
53
55
|
representative_data_gen (Callable): Generator function for representative dataset used in pruning analysis.
|
54
56
|
pruning_config (PruningConfig): Configuration object specifying how pruning should be performed.
|
55
|
-
target_platform_capabilities (
|
57
|
+
target_platform_capabilities (FrameworkQuantizationCapabilities): Object encapsulating the capabilities of the target hardware platform.
|
56
58
|
"""
|
57
59
|
self.float_graph = float_graph
|
58
60
|
self.fw_info = fw_info
|
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
+
from dataclasses import dataclass, field
|
15
16
|
from typing import List, Union, Dict
|
16
17
|
|
17
18
|
from model_compression_toolkit.core.common import Graph
|
@@ -19,6 +20,7 @@ from model_compression_toolkit.core.common.matchers.node_matcher import BaseNode
|
|
19
20
|
from model_compression_toolkit.logger import Logger
|
20
21
|
|
21
22
|
|
23
|
+
@dataclass
|
22
24
|
class ManualBitWidthSelection:
|
23
25
|
"""
|
24
26
|
Class to encapsulate the manual bit width selection configuration for a specific filter.
|
@@ -27,13 +29,11 @@ class ManualBitWidthSelection:
|
|
27
29
|
filter (BaseNodeMatcher): The filter used to select nodes for bit width manipulation.
|
28
30
|
bit_width (int): The bit width to be applied to the selected nodes.
|
29
31
|
"""
|
30
|
-
|
31
|
-
|
32
|
-
bit_width: int):
|
33
|
-
self.filter = filter
|
34
|
-
self.bit_width = bit_width
|
32
|
+
filter: BaseNodeMatcher
|
33
|
+
bit_width: int
|
35
34
|
|
36
35
|
|
36
|
+
@dataclass
|
37
37
|
class BitWidthConfig:
|
38
38
|
"""
|
39
39
|
Class to manage manual bit-width configurations.
|
@@ -41,13 +41,7 @@ class BitWidthConfig:
|
|
41
41
|
Attributes:
|
42
42
|
manual_activation_bit_width_selection_list (List[ManualBitWidthSelection]): A list of ManualBitWidthSelection objects defining manual bit-width configurations.
|
43
43
|
"""
|
44
|
-
|
45
|
-
manual_activation_bit_width_selection_list: List[ManualBitWidthSelection] = None):
|
46
|
-
self.manual_activation_bit_width_selection_list = [] if manual_activation_bit_width_selection_list is None else manual_activation_bit_width_selection_list
|
47
|
-
|
48
|
-
def __repr__(self):
|
49
|
-
# Used for debugging, thus no cover.
|
50
|
-
return str(self.__dict__) # pragma: no cover
|
44
|
+
manual_activation_bit_width_selection_list: List[ManualBitWidthSelection] = field(default_factory=list)
|
51
45
|
|
52
46
|
def set_manual_activation_bit_width(self,
|
53
47
|
filters: Union[List[BaseNodeMatcher], BaseNodeMatcher],
|
@@ -15,8 +15,7 @@
|
|
15
15
|
import copy
|
16
16
|
from typing import List
|
17
17
|
|
18
|
-
from
|
19
|
-
|
18
|
+
from mct_quantizers import QuantizationMethod
|
20
19
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
21
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
22
21
|
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
|
@@ -401,9 +401,9 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
|
|
401
401
|
# therefore, we need to look for the attribute in the op_cfg that is contained in the node attribute's name.
|
402
402
|
attrs_included_in_name = {k: v for k, v in op_cfg.attr_weights_configs_mapping.items() if k in attr}
|
403
403
|
if len(attrs_included_in_name) > 1: # pragma: no cover
|
404
|
-
Logger.critical(f"Found multiple attribute in
|
404
|
+
Logger.critical(f"Found multiple attribute in FQC OpConfig that are contained "
|
405
405
|
f"in the attribute name '{attr}'."
|
406
|
-
f"Please fix the
|
406
|
+
f"Please fix the FQC attribute names mapping such that each operator's attribute would "
|
407
407
|
f"have a unique matching name.")
|
408
408
|
if len(attrs_included_in_name) == 0:
|
409
409
|
attr_cfg = op_cfg.default_weight_attr_config
|