mct-nightly 2.2.0.20241222.533__py3-none-any.whl → 2.2.0.20241223.525__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/RECORD +26 -28
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/base_graph.py +1 -1
- model_compression_toolkit/core/common/graph/base_node.py +3 -3
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +4 -4
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +1 -0
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +4 -5
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +63 -170
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +7 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +50 -51
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +54 -52
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +57 -53
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +52 -51
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +53 -51
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +59 -57
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +54 -52
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +90 -83
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +26 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +57 -55
- model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +0 -67
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +0 -30
- {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/top_level.txt +0 -0
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py
CHANGED
@@ -19,7 +19,8 @@ import model_compression_toolkit.target_platform_capabilities.schema.mct_current
|
|
19
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
21
21
|
WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel,
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
|
23
|
+
Signedness, \
|
23
24
|
AttributeQuantizationConfig, OpQuantizationConfig
|
24
25
|
|
25
26
|
tp = mct.target_platform
|
@@ -152,7 +153,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
152
153
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
153
154
|
# If the QuantizationConfigOptions contains only one configuration,
|
154
155
|
# this configuration will be used for the operation quantization:
|
155
|
-
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
156
|
+
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
156
157
|
|
157
158
|
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
158
159
|
# Constant configuration is similar to the default eight bit configuration except for PoT
|
@@ -163,7 +164,55 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
163
164
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
164
165
|
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
165
166
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
166
|
-
const_configuration_options = schema.QuantizationConfigOptions([const_config])
|
167
|
+
const_configuration_options = schema.QuantizationConfigOptions(tuple([const_config]))
|
168
|
+
|
169
|
+
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
170
|
+
mixed_precision_configuration_options = schema.QuantizationConfigOptions(tuple(mixed_precision_cfg_list),
|
171
|
+
base_config=base_config)
|
172
|
+
|
173
|
+
# Create an OperatorsSet to represent a set of operations.
|
174
|
+
# Each OperatorsSet has a unique label.
|
175
|
+
# If a quantization configuration options is passed, these options will
|
176
|
+
# be used for operations that will be attached to this set's label.
|
177
|
+
# Otherwise, it will be a configure-less set (used in fusing):
|
178
|
+
operator_set = []
|
179
|
+
fusing_patterns = []
|
180
|
+
# May suit for operations like: Dropout, Reshape, etc.
|
181
|
+
operator_set.append(schema.OperatorsSet("NoQuantization",
|
182
|
+
default_configuration_options.clone_and_edit(
|
183
|
+
enable_activation_quantization=False)
|
184
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
185
|
+
|
186
|
+
# Define operator sets that use mixed_precision_configuration_options:
|
187
|
+
conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options)
|
188
|
+
fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
|
189
|
+
|
190
|
+
# Define operations sets without quantization configuration
|
191
|
+
# options (useful for creating fusing patterns, for example):
|
192
|
+
any_relu = schema.OperatorsSet("AnyReLU")
|
193
|
+
add = schema.OperatorsSet("Add", const_configuration_options)
|
194
|
+
sub = schema.OperatorsSet("Sub", const_configuration_options)
|
195
|
+
mul = schema.OperatorsSet("Mul", const_configuration_options)
|
196
|
+
div = schema.OperatorsSet("Div", const_configuration_options)
|
197
|
+
prelu = schema.OperatorsSet("PReLU")
|
198
|
+
swish = schema.OperatorsSet("Swish")
|
199
|
+
sigmoid = schema.OperatorsSet("Sigmoid")
|
200
|
+
tanh = schema.OperatorsSet("Tanh")
|
201
|
+
|
202
|
+
operator_set.extend([conv, fc, any_relu, add, sub, mul, div, prelu, swish, sigmoid, tanh])
|
203
|
+
# Combine multiple operators into a single operator to avoid quantization between
|
204
|
+
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
205
|
+
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
206
|
+
activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid, tanh])
|
207
|
+
activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid])
|
208
|
+
any_binary = schema.OperatorSetConcat([add, sub, mul, div])
|
209
|
+
|
210
|
+
# ------------------- #
|
211
|
+
# Fusions
|
212
|
+
# ------------------- #
|
213
|
+
fusing_patterns.append(schema.Fusing((conv, activations_after_conv_to_fuse)))
|
214
|
+
fusing_patterns.append(schema.Fusing((fc, activations_after_fc_to_fuse)))
|
215
|
+
fusing_patterns.append(schema.Fusing((any_binary, any_relu)))
|
167
216
|
|
168
217
|
# Create a TargetPlatformModel and set its default quantization config.
|
169
218
|
# This default configuration will be used for all operations
|
@@ -173,56 +222,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
173
222
|
tpc_minor_version=3,
|
174
223
|
tpc_patch_version=0,
|
175
224
|
tpc_platform_type=IMX500_TP_MODEL,
|
225
|
+
operator_set=tuple(operator_set),
|
226
|
+
fusing_patterns=tuple(fusing_patterns),
|
176
227
|
add_metadata=True,
|
177
228
|
name=name)
|
178
229
|
|
179
|
-
# To start defining the model's components (such as operator sets, and fusing patterns),
|
180
|
-
# use 'with' the TargetPlatformModel instance, and create them as below:
|
181
|
-
with generated_tpm:
|
182
|
-
# Create an OperatorsSet to represent a set of operations.
|
183
|
-
# Each OperatorsSet has a unique label.
|
184
|
-
# If a quantization configuration options is passed, these options will
|
185
|
-
# be used for operations that will be attached to this set's label.
|
186
|
-
# Otherwise, it will be a configure-less set (used in fusing):
|
187
|
-
|
188
|
-
# May suit for operations like: Dropout, Reshape, etc.
|
189
|
-
default_qco = tp.get_default_quantization_config_options()
|
190
|
-
schema.OperatorsSet("NoQuantization",
|
191
|
-
default_qco.clone_and_edit(enable_activation_quantization=False)
|
192
|
-
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
193
|
-
|
194
|
-
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
195
|
-
mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list,
|
196
|
-
base_config=base_config)
|
197
|
-
|
198
|
-
# Define operator sets that use mixed_precision_configuration_options:
|
199
|
-
conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options)
|
200
|
-
fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
|
201
|
-
|
202
|
-
# Define operations sets without quantization configuration
|
203
|
-
# options (useful for creating fusing patterns, for example):
|
204
|
-
any_relu = schema.OperatorsSet("AnyReLU")
|
205
|
-
add = schema.OperatorsSet("Add", const_configuration_options)
|
206
|
-
sub = schema.OperatorsSet("Sub", const_configuration_options)
|
207
|
-
mul = schema.OperatorsSet("Mul", const_configuration_options)
|
208
|
-
div = schema.OperatorsSet("Div", const_configuration_options)
|
209
|
-
prelu = schema.OperatorsSet("PReLU")
|
210
|
-
swish = schema.OperatorsSet("Swish")
|
211
|
-
sigmoid = schema.OperatorsSet("Sigmoid")
|
212
|
-
tanh = schema.OperatorsSet("Tanh")
|
213
|
-
|
214
|
-
# Combine multiple operators into a single operator to avoid quantization between
|
215
|
-
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
216
|
-
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
217
|
-
activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid, tanh])
|
218
|
-
activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid])
|
219
|
-
any_binary = schema.OperatorSetConcat([add, sub, mul, div])
|
220
|
-
|
221
|
-
# ------------------- #
|
222
|
-
# Fusions
|
223
|
-
# ------------------- #
|
224
|
-
schema.Fusing([conv, activations_after_conv_to_fuse])
|
225
|
-
schema.Fusing([fc, activations_after_fc_to_fuse])
|
226
|
-
schema.Fusing([any_binary, any_relu])
|
227
|
-
|
228
230
|
return generated_tpm
|
@@ -19,7 +19,8 @@ import model_compression_toolkit.target_platform_capabilities.schema.v1 as schem
|
|
19
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
21
21
|
IMX500_TP_MODEL
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel,
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
|
23
|
+
Signedness, \
|
23
24
|
AttributeQuantizationConfig, OpQuantizationConfig
|
24
25
|
|
25
26
|
tp = mct.target_platform
|
@@ -87,7 +88,8 @@ def get_op_quantization_configs() -> \
|
|
87
88
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
88
89
|
weights_n_bits=8,
|
89
90
|
weights_per_channel_threshold=False,
|
90
|
-
enable_weights_quantization=False,
|
91
|
+
enable_weights_quantization=False,
|
92
|
+
# TODO: this will changed to True once implementing multi-attributes quantization
|
91
93
|
lut_values_bitwidth=None)
|
92
94
|
|
93
95
|
# define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
@@ -176,13 +178,13 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
176
178
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
177
179
|
# If the QuantizationConfigOptions contains only one configuration,
|
178
180
|
# this configuration will be used for the operation quantization:
|
179
|
-
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
181
|
+
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
180
182
|
default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16))
|
181
|
-
default_config_options_16bit = schema.QuantizationConfigOptions([default_config_input16,
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
183
|
+
default_config_options_16bit = schema.QuantizationConfigOptions(tuple([default_config_input16,
|
184
|
+
default_config_input16.clone_and_edit(
|
185
|
+
activation_n_bits=16,
|
186
|
+
signedness=Signedness.SIGNED)]),
|
187
|
+
base_config=default_config_input16)
|
186
188
|
|
187
189
|
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
188
190
|
# Constant configuration is similar to the default eight bit configuration except for PoT
|
@@ -193,7 +195,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
193
195
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
194
196
|
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
195
197
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
196
|
-
const_configuration_options = schema.QuantizationConfigOptions([const_config])
|
198
|
+
const_configuration_options = schema.QuantizationConfigOptions(tuple([const_config]))
|
197
199
|
|
198
200
|
# 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that
|
199
201
|
# support 16 bit as input and output.
|
@@ -201,9 +203,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
201
203
|
supported_input_activation_n_bits=(8, 16))
|
202
204
|
const_config_input16_output16 = const_config_input16.clone_and_edit(
|
203
205
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
204
|
-
const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16,
|
205
|
-
|
206
|
-
|
206
|
+
const_configuration_options_inout16 = schema.QuantizationConfigOptions(tuple([const_config_input16_output16,
|
207
|
+
const_config_input16]),
|
208
|
+
base_config=const_config_input16)
|
207
209
|
|
208
210
|
const_config_input16_per_tensor = const_config.clone_and_edit(
|
209
211
|
supported_input_activation_n_bits=(8, 16),
|
@@ -213,20 +215,91 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
213
215
|
)
|
214
216
|
const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit(
|
215
217
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
216
|
-
const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(
|
218
|
+
const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(tuple(
|
217
219
|
[const_config_input16_output16_per_tensor,
|
218
|
-
const_config_input16_per_tensor],
|
220
|
+
const_config_input16_per_tensor]),
|
219
221
|
base_config=const_config_input16_per_tensor)
|
220
222
|
|
221
223
|
qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False,
|
222
224
|
quantization_preserving=True,
|
223
225
|
default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit(
|
224
226
|
weights_per_channel_threshold=False))
|
225
|
-
qpreserving_const_config_options = schema.QuantizationConfigOptions([qpreserving_const_config])
|
227
|
+
qpreserving_const_config_options = schema.QuantizationConfigOptions(tuple([qpreserving_const_config]))
|
226
228
|
|
227
229
|
mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)
|
228
230
|
for mp_cfg in mixed_precision_cfg_list]
|
229
231
|
|
232
|
+
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
233
|
+
mixed_precision_configuration_options = schema.QuantizationConfigOptions(tuple(
|
234
|
+
mixed_precision_cfg_list + mp_cfg_list_16bit),
|
235
|
+
base_config=base_config)
|
236
|
+
|
237
|
+
# Create an OperatorsSet to represent a set of operations.
|
238
|
+
# Each OperatorsSet has a unique label.
|
239
|
+
# If a quantization configuration options is passed, these options will
|
240
|
+
# be used for operations that will be attached to this set's label.
|
241
|
+
# Otherwise, it will be a configure-less set (used in fusing):
|
242
|
+
operator_set = []
|
243
|
+
fusing_patterns = []
|
244
|
+
# May suit for operations like: Dropout, Reshape, etc.
|
245
|
+
operator_set.append(schema.OperatorsSet(OPSET_NO_QUANTIZATION,
|
246
|
+
default_configuration_options.clone_and_edit(
|
247
|
+
enable_activation_quantization=False)
|
248
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
249
|
+
operator_set.append(schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
|
250
|
+
default_configuration_options.clone_and_edit(
|
251
|
+
enable_activation_quantization=False,
|
252
|
+
quantization_preserving=True)
|
253
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
254
|
+
operator_set.append(
|
255
|
+
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options))
|
256
|
+
operator_set.append(schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS,
|
257
|
+
default_configuration_options.clone_and_edit(
|
258
|
+
enable_activation_quantization=False,
|
259
|
+
quantization_preserving=True,
|
260
|
+
supported_input_activation_n_bits=(8, 16))
|
261
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
262
|
+
operator_set.append(schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor))
|
263
|
+
|
264
|
+
# Define operator sets that use mixed_precision_configuration_options:
|
265
|
+
conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
|
266
|
+
fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
|
267
|
+
|
268
|
+
operator_set.append(schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit))
|
269
|
+
|
270
|
+
# Note: Operations sets without quantization configuration are useful for creating fusing patterns
|
271
|
+
any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
|
272
|
+
add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
|
273
|
+
sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
|
274
|
+
mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
|
275
|
+
div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
|
276
|
+
min_max = schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
|
277
|
+
prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
|
278
|
+
swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
|
279
|
+
sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
|
280
|
+
tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
|
281
|
+
gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
|
282
|
+
hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
|
283
|
+
hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
|
284
|
+
|
285
|
+
operator_set.extend(
|
286
|
+
[conv, fc, any_relu, add, sub, mul, div, prelu, swish, sigmoid, tanh, min_max, gelu, hardsigmoid, hardswish])
|
287
|
+
# Combine multiple operators into a single operator to avoid quantization between
|
288
|
+
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
289
|
+
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
290
|
+
activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid,
|
291
|
+
tanh, gelu, hardswish, hardsigmoid])
|
292
|
+
activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid, tanh, gelu,
|
293
|
+
hardswish, hardsigmoid])
|
294
|
+
any_binary = schema.OperatorSetConcat([add, sub, mul, div])
|
295
|
+
|
296
|
+
# ------------------- #
|
297
|
+
# Fusions
|
298
|
+
# ------------------- #
|
299
|
+
fusing_patterns.append(schema.Fusing((conv, activations_after_conv_to_fuse)))
|
300
|
+
fusing_patterns.append(schema.Fusing((fc, activations_after_fc_to_fuse)))
|
301
|
+
fusing_patterns.append(schema.Fusing((any_binary, any_relu)))
|
302
|
+
|
230
303
|
# Create a TargetPlatformModel and set its default quantization config.
|
231
304
|
# This default configuration will be used for all operations
|
232
305
|
# unless specified otherwise (see OperatorsSet, for example):
|
@@ -235,76 +308,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
235
308
|
tpc_minor_version=4,
|
236
309
|
tpc_patch_version=0,
|
237
310
|
tpc_platform_type=IMX500_TP_MODEL,
|
311
|
+
operator_set=tuple(operator_set),
|
312
|
+
fusing_patterns=tuple(fusing_patterns),
|
238
313
|
add_metadata=True,
|
239
314
|
name=name,
|
240
315
|
is_simd_padding=True)
|
241
316
|
|
242
|
-
# To start defining the model's components (such as operator sets, and fusing patterns),
|
243
|
-
# use 'with' the TargetPlatformModel instance, and create them as below:
|
244
|
-
with generated_tpm:
|
245
|
-
# Create an OperatorsSet to represent a set of operations.
|
246
|
-
# Each OperatorsSet has a unique label.
|
247
|
-
# If a quantization configuration options is passed, these options will
|
248
|
-
# be used for operations that will be attached to this set's label.
|
249
|
-
# Otherwise, it will be a configure-less set (used in fusing):
|
250
|
-
|
251
|
-
# May suit for operations like: Dropout, Reshape, etc.
|
252
|
-
default_qco = tp.get_default_quantization_config_options()
|
253
|
-
schema.OperatorsSet(OPSET_NO_QUANTIZATION,
|
254
|
-
default_qco.clone_and_edit(enable_activation_quantization=False)
|
255
|
-
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
256
|
-
schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
|
257
|
-
default_qco.clone_and_edit(enable_activation_quantization=False,
|
258
|
-
quantization_preserving=True)
|
259
|
-
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
260
|
-
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options)
|
261
|
-
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS,
|
262
|
-
default_qco.clone_and_edit(enable_activation_quantization=False,
|
263
|
-
quantization_preserving=True,
|
264
|
-
supported_input_activation_n_bits=(8, 16))
|
265
|
-
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
266
|
-
schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor)
|
267
|
-
|
268
|
-
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
269
|
-
mixed_precision_configuration_options = schema.QuantizationConfigOptions(
|
270
|
-
mixed_precision_cfg_list + mp_cfg_list_16bit,
|
271
|
-
base_config=base_config)
|
272
|
-
|
273
|
-
# Define operator sets that use mixed_precision_configuration_options:
|
274
|
-
conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
|
275
|
-
fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
|
276
|
-
|
277
|
-
schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit)
|
278
|
-
|
279
|
-
# Note: Operations sets without quantization configuration are useful for creating fusing patterns
|
280
|
-
any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
|
281
|
-
add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
|
282
|
-
sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
|
283
|
-
mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
|
284
|
-
div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
|
285
|
-
schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
|
286
|
-
prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
|
287
|
-
swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
|
288
|
-
sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
|
289
|
-
tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
|
290
|
-
gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
|
291
|
-
hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
|
292
|
-
hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
|
293
|
-
|
294
|
-
# Combine multiple operators into a single operator to avoid quantization between
|
295
|
-
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
296
|
-
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
297
|
-
activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid,
|
298
|
-
tanh, gelu, hardswish, hardsigmoid])
|
299
|
-
activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid, tanh, gelu,
|
300
|
-
hardswish, hardsigmoid])
|
301
|
-
any_binary = schema.OperatorSetConcat([add, sub, mul, div])
|
302
|
-
|
303
|
-
# ------------------- #
|
304
|
-
# Fusions
|
305
|
-
# ------------------- #
|
306
|
-
schema.Fusing([conv, activations_after_conv_to_fuse])
|
307
|
-
schema.Fusing([fc, activations_after_fc_to_fuse])
|
308
|
-
schema.Fusing([any_binary, any_relu])
|
309
|
-
|
310
317
|
return generated_tpm
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py
CHANGED
@@ -18,7 +18,8 @@ import model_compression_toolkit as mct
|
|
18
18
|
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel,
|
21
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
|
22
|
+
Signedness, \
|
22
23
|
AttributeQuantizationConfig, OpQuantizationConfig
|
23
24
|
|
24
25
|
tp = mct.target_platform
|
@@ -138,8 +139,28 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
138
139
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
139
140
|
# If the QuantizationConfigOptions contains only one configuration,
|
140
141
|
# this configuration will be used for the operation quantization:
|
141
|
-
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
142
|
-
|
142
|
+
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
143
|
+
|
144
|
+
# Combine operations/modules into a single module.
|
145
|
+
# Pytorch supports the next fusing patterns:
|
146
|
+
# [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
|
147
|
+
# Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode
|
148
|
+
operator_set = []
|
149
|
+
fusing_patterns = []
|
150
|
+
|
151
|
+
conv = schema.OperatorsSet("Conv")
|
152
|
+
batchnorm = schema.OperatorsSet("BatchNorm")
|
153
|
+
relu = schema.OperatorsSet("Relu")
|
154
|
+
linear = schema.OperatorsSet("Linear")
|
155
|
+
|
156
|
+
operator_set.extend([conv, batchnorm, relu, linear])
|
157
|
+
# ------------------- #
|
158
|
+
# Fusions
|
159
|
+
# ------------------- #
|
160
|
+
fusing_patterns.append(schema.Fusing((conv, batchnorm, relu)))
|
161
|
+
fusing_patterns.append(schema.Fusing((conv, batchnorm)))
|
162
|
+
fusing_patterns.append(schema.Fusing((conv, relu)))
|
163
|
+
fusing_patterns.append(schema.Fusing((linear, relu)))
|
143
164
|
# Create a TargetPlatformModel and set its default quantization config.
|
144
165
|
# This default configuration will be used for all operations
|
145
166
|
# unless specified otherwise (see OperatorsSet, for example):
|
@@ -148,27 +169,8 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
148
169
|
tpc_minor_version=1,
|
149
170
|
tpc_patch_version=0,
|
150
171
|
tpc_platform_type=QNNPACK_TP_MODEL,
|
172
|
+
operator_set=tuple(operator_set),
|
173
|
+
fusing_patterns=tuple(fusing_patterns),
|
151
174
|
add_metadata=False,
|
152
175
|
name=name)
|
153
|
-
|
154
|
-
# To start defining the model's components (such as operator sets, and fusing patterns),
|
155
|
-
# use 'with' the target platform model instance, and create them as below:
|
156
|
-
with generated_tpc:
|
157
|
-
# Combine operations/modules into a single module.
|
158
|
-
# Pytorch supports the next fusing patterns:
|
159
|
-
# [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
|
160
|
-
# Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode
|
161
|
-
conv = schema.OperatorsSet("Conv")
|
162
|
-
batchnorm = schema.OperatorsSet("BatchNorm")
|
163
|
-
relu = schema.OperatorsSet("Relu")
|
164
|
-
linear = schema.OperatorsSet("Linear")
|
165
|
-
|
166
|
-
# ------------------- #
|
167
|
-
# Fusions
|
168
|
-
# ------------------- #
|
169
|
-
schema.Fusing([conv, batchnorm, relu])
|
170
|
-
schema.Fusing([conv, batchnorm])
|
171
|
-
schema.Fusing([conv, relu])
|
172
|
-
schema.Fusing([linear, relu])
|
173
|
-
|
174
176
|
return generated_tpc
|
@@ -136,7 +136,61 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
136
136
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
137
137
|
# If the QuantizationConfigOptions contains only one configuration,
|
138
138
|
# this configuration will be used for the operation quantization:
|
139
|
-
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
139
|
+
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
140
|
+
|
141
|
+
# In TFLite, the quantized operator specifications constraint operators quantization
|
142
|
+
# differently. For more details:
|
143
|
+
# https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications
|
144
|
+
operator_set = []
|
145
|
+
fusing_patterns = []
|
146
|
+
|
147
|
+
operator_set.append(schema.OperatorsSet("NoQuantization",
|
148
|
+
default_configuration_options.clone_and_edit(
|
149
|
+
quantization_preserving=True)))
|
150
|
+
|
151
|
+
fc = schema.OperatorsSet("FullyConnected",
|
152
|
+
default_configuration_options.clone_and_edit_weight_attribute(weights_per_channel_threshold=False))
|
153
|
+
|
154
|
+
operator_set.append(schema.OperatorsSet("L2Normalization",
|
155
|
+
default_configuration_options.clone_and_edit(
|
156
|
+
fixed_zero_point=0, fixed_scale=1 / 128)))
|
157
|
+
operator_set.append(schema.OperatorsSet("LogSoftmax",
|
158
|
+
default_configuration_options.clone_and_edit(
|
159
|
+
fixed_zero_point=127, fixed_scale=16 / 256)))
|
160
|
+
operator_set.append(schema.OperatorsSet("Tanh",
|
161
|
+
default_configuration_options.clone_and_edit(
|
162
|
+
fixed_zero_point=0, fixed_scale=1 / 128)))
|
163
|
+
operator_set.append(schema.OperatorsSet("Softmax",
|
164
|
+
default_configuration_options.clone_and_edit(
|
165
|
+
fixed_zero_point=-128, fixed_scale=1 / 256)))
|
166
|
+
operator_set.append(schema.OperatorsSet("Logistic",
|
167
|
+
default_configuration_options.clone_and_edit(
|
168
|
+
fixed_zero_point=-128, fixed_scale=1 / 256)))
|
169
|
+
|
170
|
+
conv2d = schema.OperatorsSet("Conv2d")
|
171
|
+
kernel = schema.OperatorSetConcat([conv2d, fc])
|
172
|
+
|
173
|
+
relu = schema.OperatorsSet("Relu")
|
174
|
+
elu = schema.OperatorsSet("Elu")
|
175
|
+
activations_to_fuse = schema.OperatorSetConcat([relu, elu])
|
176
|
+
|
177
|
+
batch_norm = schema.OperatorsSet("BatchNorm")
|
178
|
+
bias_add = schema.OperatorsSet("BiasAdd")
|
179
|
+
add = schema.OperatorsSet("Add")
|
180
|
+
squeeze = schema.OperatorsSet("Squeeze",
|
181
|
+
qc_options=default_configuration_options.clone_and_edit(
|
182
|
+
quantization_preserving=True))
|
183
|
+
operator_set.extend([fc, conv2d, kernel, relu, elu, batch_norm, bias_add, add, squeeze])
|
184
|
+
# ------------------- #
|
185
|
+
# Fusions
|
186
|
+
# ------------------- #
|
187
|
+
# Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper
|
188
|
+
fusing_patterns.append(schema.Fusing((kernel, bias_add)))
|
189
|
+
fusing_patterns.append(schema.Fusing((kernel, bias_add, activations_to_fuse)))
|
190
|
+
fusing_patterns.append(schema.Fusing((conv2d, batch_norm, activations_to_fuse)))
|
191
|
+
fusing_patterns.append(schema.Fusing((conv2d, squeeze, activations_to_fuse)))
|
192
|
+
fusing_patterns.append(schema.Fusing((batch_norm, activations_to_fuse)))
|
193
|
+
fusing_patterns.append(schema.Fusing((batch_norm, add, activations_to_fuse)))
|
140
194
|
|
141
195
|
# Create a TargetPlatformModel and set its default quantization config.
|
142
196
|
# This default configuration will be used for all operations
|
@@ -145,62 +199,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
145
199
|
default_configuration_options,
|
146
200
|
tpc_minor_version=1,
|
147
201
|
tpc_patch_version=0,
|
202
|
+
operator_set=tuple(operator_set),
|
203
|
+
fusing_patterns=tuple(fusing_patterns),
|
148
204
|
tpc_platform_type=TFLITE_TP_MODEL,
|
149
205
|
add_metadata=False,
|
150
206
|
name=name)
|
151
207
|
|
152
|
-
# To start defining the model's components (such as operator sets, and fusing patterns),
|
153
|
-
# use 'with' the TargetPlatformModel instance, and create them as below:
|
154
|
-
with generated_tpc:
|
155
|
-
# In TFLite, the quantized operator specifications constraint operators quantization
|
156
|
-
# differently. For more details:
|
157
|
-
# https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications
|
158
|
-
schema.OperatorsSet("NoQuantization",
|
159
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
160
|
-
quantization_preserving=True))
|
161
|
-
|
162
|
-
fc_qco = tp.get_default_quantization_config_options()
|
163
|
-
fc = schema.OperatorsSet("FullyConnected",
|
164
|
-
fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False))
|
165
|
-
|
166
|
-
schema.OperatorsSet("L2Normalization",
|
167
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
168
|
-
fixed_zero_point=0, fixed_scale=1 / 128))
|
169
|
-
schema.OperatorsSet("LogSoftmax",
|
170
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
171
|
-
fixed_zero_point=127, fixed_scale=16 / 256))
|
172
|
-
schema.OperatorsSet("Tanh",
|
173
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
174
|
-
fixed_zero_point=0, fixed_scale=1 / 128))
|
175
|
-
schema.OperatorsSet("Softmax",
|
176
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
177
|
-
fixed_zero_point=-128, fixed_scale=1 / 256))
|
178
|
-
schema.OperatorsSet("Logistic",
|
179
|
-
tp.get_default_quantization_config_options().clone_and_edit(
|
180
|
-
fixed_zero_point=-128, fixed_scale=1 / 256))
|
181
|
-
|
182
|
-
conv2d = schema.OperatorsSet("Conv2d")
|
183
|
-
kernel = schema.OperatorSetConcat([conv2d, fc])
|
184
|
-
|
185
|
-
relu = schema.OperatorsSet("Relu")
|
186
|
-
elu = schema.OperatorsSet("Elu")
|
187
|
-
activations_to_fuse = schema.OperatorSetConcat([relu, elu])
|
188
|
-
|
189
|
-
batch_norm = schema.OperatorsSet("BatchNorm")
|
190
|
-
bias_add = schema.OperatorsSet("BiasAdd")
|
191
|
-
add = schema.OperatorsSet("Add")
|
192
|
-
squeeze = schema.OperatorsSet("Squeeze",
|
193
|
-
qc_options=tp.get_default_quantization_config_options().clone_and_edit(
|
194
|
-
quantization_preserving=True))
|
195
|
-
# ------------------- #
|
196
|
-
# Fusions
|
197
|
-
# ------------------- #
|
198
|
-
# Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper
|
199
|
-
schema.Fusing([kernel, bias_add])
|
200
|
-
schema.Fusing([kernel, bias_add, activations_to_fuse])
|
201
|
-
schema.Fusing([conv2d, batch_norm, activations_to_fuse])
|
202
|
-
schema.Fusing([conv2d, squeeze, activations_to_fuse])
|
203
|
-
schema.Fusing([batch_norm, activations_to_fuse])
|
204
|
-
schema.Fusing([batch_norm, add, activations_to_fuse])
|
205
|
-
|
206
208
|
return generated_tpc
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
|
16
|
-
from model_compression_toolkit.logger import Logger
|
17
|
-
|
18
|
-
def get_current_tp_model():
|
19
|
-
"""
|
20
|
-
|
21
|
-
Returns: The current TargetPlatformModel that is being used and accessed.
|
22
|
-
|
23
|
-
"""
|
24
|
-
return _current_tp_model.get()
|
25
|
-
|
26
|
-
|
27
|
-
class CurrentTPModel:
|
28
|
-
"""
|
29
|
-
Wrapper of the current TargetPlatformModel object that is being accessed and defined.
|
30
|
-
"""
|
31
|
-
|
32
|
-
def __init__(self):
|
33
|
-
super(CurrentTPModel, self).__init__()
|
34
|
-
self.tp_model = None
|
35
|
-
|
36
|
-
def get(self):
|
37
|
-
"""
|
38
|
-
|
39
|
-
Returns: The current TargetPlatformModel that is being defined.
|
40
|
-
|
41
|
-
"""
|
42
|
-
if self.tp_model is None:
|
43
|
-
Logger.critical('Target platform model is not initialized.') # pragma: no cover
|
44
|
-
return self.tp_model
|
45
|
-
|
46
|
-
def reset(self):
|
47
|
-
"""
|
48
|
-
|
49
|
-
Reset the current TargetPlatformModel so a new TargetPlatformModel can be wrapped and
|
50
|
-
used as the current TargetPlatformModel object.
|
51
|
-
|
52
|
-
"""
|
53
|
-
self.tp_model = None
|
54
|
-
|
55
|
-
def set(self, tp_model):
|
56
|
-
"""
|
57
|
-
Set and wrap a TargetPlatformModel as the current TargetPlatformModel.
|
58
|
-
|
59
|
-
Args:
|
60
|
-
tp_model: TargetPlatformModel to set as the current TargetPlatformModel to access and use.
|
61
|
-
|
62
|
-
"""
|
63
|
-
self.tp_model = tp_model
|
64
|
-
|
65
|
-
|
66
|
-
# Use a single instance for the current model.
|
67
|
-
_current_tp_model = CurrentTPModel()
|