mct-nightly 2.2.0.20241230.534__py3-none-any.whl → 2.2.0.20250102.111338__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/METADATA +8 -11
- {mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/RECORD +19 -19
- {mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/WHEEL +1 -1
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +308 -173
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +22 -22
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +22 -22
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +22 -22
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +21 -21
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +22 -22
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +25 -25
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +23 -23
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +55 -40
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +4 -6
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +2 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +10 -10
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +49 -46
- {mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/top_level.txt +0 -0
@@ -29,6 +29,7 @@ OPSET_NO_QUANTIZATION = "NoQuantization"
|
|
29
29
|
OPSET_QUANTIZATION_PRESERVING = "QuantizationPreserving"
|
30
30
|
OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS = "DimensionManipulationOpsWithWeights"
|
31
31
|
OPSET_DIMENSION_MANIPULATION_OPS = "DimensionManipulationOps"
|
32
|
+
OPSET_SPLIT_OPS = "SplitOps"
|
32
33
|
OPSET_MERGE_OPS = "MergeOps"
|
33
34
|
OPSET_CONV = "Conv"
|
34
35
|
OPSET_FULLY_CONNECTED = "FullyConnected"
|
@@ -178,14 +179,24 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
178
179
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
179
180
|
# If the QuantizationConfigOptions contains only one configuration,
|
180
181
|
# this configuration will be used for the operation quantization:
|
181
|
-
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
182
|
+
default_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([default_config]))
|
182
183
|
default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16))
|
183
|
-
default_config_options_16bit = schema.QuantizationConfigOptions(tuple([default_config_input16,
|
184
|
+
default_config_options_16bit = schema.QuantizationConfigOptions(quantization_configurations=tuple([default_config_input16,
|
184
185
|
default_config_input16.clone_and_edit(
|
185
186
|
activation_n_bits=16,
|
186
187
|
signedness=Signedness.SIGNED)]),
|
187
188
|
base_config=default_config_input16)
|
188
189
|
|
190
|
+
qpreseving_config = default_config.clone_and_edit(enable_activation_quantization=False,
|
191
|
+
quantization_preserving=True,
|
192
|
+
supported_input_activation_n_bits=(8, 16))
|
193
|
+
|
194
|
+
qpreseving_config_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([qpreseving_config,
|
195
|
+
qpreseving_config.clone_and_edit(
|
196
|
+
activation_n_bits=16,
|
197
|
+
signedness=Signedness.SIGNED)]),
|
198
|
+
base_config=qpreseving_config)
|
199
|
+
|
189
200
|
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
190
201
|
# Constant configuration is similar to the default eight bit configuration except for PoT
|
191
202
|
# quantization method for the constant.
|
@@ -195,7 +206,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
195
206
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
196
207
|
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
197
208
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
198
|
-
const_configuration_options = schema.QuantizationConfigOptions(tuple([const_config]))
|
209
|
+
const_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([const_config]))
|
199
210
|
|
200
211
|
# 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that
|
201
212
|
# support 16 bit as input and output.
|
@@ -203,9 +214,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
203
214
|
supported_input_activation_n_bits=(8, 16))
|
204
215
|
const_config_input16_output16 = const_config_input16.clone_and_edit(
|
205
216
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
206
|
-
const_configuration_options_inout16 = schema.QuantizationConfigOptions(
|
207
|
-
|
208
|
-
|
217
|
+
const_configuration_options_inout16 = schema.QuantizationConfigOptions(
|
218
|
+
quantization_configurations=tuple([const_config_input16_output16,
|
219
|
+
const_config_input16]),
|
220
|
+
base_config=const_config_input16)
|
209
221
|
|
210
222
|
const_config_input16_per_tensor = const_config.clone_and_edit(
|
211
223
|
supported_input_activation_n_bits=(8, 16),
|
@@ -215,7 +227,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
215
227
|
)
|
216
228
|
const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit(
|
217
229
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
218
|
-
const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(tuple(
|
230
|
+
const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(quantization_configurations=tuple(
|
219
231
|
[const_config_input16_output16_per_tensor,
|
220
232
|
const_config_input16_per_tensor]),
|
221
233
|
base_config=const_config_input16_per_tensor)
|
@@ -224,13 +236,13 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
224
236
|
quantization_preserving=True,
|
225
237
|
default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit(
|
226
238
|
weights_per_channel_threshold=False))
|
227
|
-
qpreserving_const_config_options = schema.QuantizationConfigOptions(tuple([qpreserving_const_config]))
|
239
|
+
qpreserving_const_config_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([qpreserving_const_config]))
|
228
240
|
|
229
241
|
mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)
|
230
242
|
for mp_cfg in mixed_precision_cfg_list]
|
231
243
|
|
232
244
|
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
233
|
-
mixed_precision_configuration_options = schema.QuantizationConfigOptions(tuple(
|
245
|
+
mixed_precision_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=tuple(
|
234
246
|
mixed_precision_cfg_list + mp_cfg_list_16bit),
|
235
247
|
base_config=base_config)
|
236
248
|
|
@@ -242,69 +254,72 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
242
254
|
operator_set = []
|
243
255
|
fusing_patterns = []
|
244
256
|
# May suit for operations like: Dropout, Reshape, etc.
|
245
|
-
operator_set.append(schema.OperatorsSet(OPSET_NO_QUANTIZATION,
|
246
|
-
default_configuration_options.clone_and_edit(
|
257
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_NO_QUANTIZATION,
|
258
|
+
qc_options=default_configuration_options.clone_and_edit(
|
247
259
|
enable_activation_quantization=False)
|
248
260
|
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
249
|
-
operator_set.append(schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
|
250
|
-
default_configuration_options.clone_and_edit(
|
261
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_QUANTIZATION_PRESERVING,
|
262
|
+
qc_options=default_configuration_options.clone_and_edit(
|
251
263
|
enable_activation_quantization=False,
|
252
264
|
quantization_preserving=True)
|
253
265
|
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
254
266
|
operator_set.append(
|
255
|
-
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS,
|
256
|
-
|
257
|
-
|
267
|
+
schema.OperatorsSet(name=OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS,
|
268
|
+
qc_options=qpreserving_const_config_options))
|
269
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_DIMENSION_MANIPULATION_OPS,
|
270
|
+
qc_options=default_configuration_options.clone_and_edit(
|
258
271
|
enable_activation_quantization=False,
|
259
272
|
quantization_preserving=True,
|
260
273
|
supported_input_activation_n_bits=(8, 16))
|
261
274
|
.clone_and_edit_weight_attribute(enable_weights_quantization=False)))
|
262
|
-
|
275
|
+
|
276
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_SPLIT_OPS, qc_options=qpreseving_config_options))
|
277
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_MERGE_OPS, qc_options=const_configuration_options_inout16_per_tensor))
|
263
278
|
|
264
279
|
# Define operator sets that use mixed_precision_configuration_options:
|
265
|
-
conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
|
266
|
-
fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
|
280
|
+
conv = schema.OperatorsSet(name=OPSET_CONV, qc_options=mixed_precision_configuration_options)
|
281
|
+
fc = schema.OperatorsSet(name=OPSET_FULLY_CONNECTED, qc_options=mixed_precision_configuration_options)
|
267
282
|
|
268
|
-
operator_set.append(schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit))
|
283
|
+
operator_set.append(schema.OperatorsSet(name=OPSET_BATCH_NORM, qc_options=default_config_options_16bit))
|
269
284
|
|
270
285
|
# Note: Operations sets without quantization configuration are useful for creating fusing patterns
|
271
|
-
any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
|
272
|
-
add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
|
273
|
-
sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
|
274
|
-
mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
|
275
|
-
div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
|
276
|
-
min_max = schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
|
277
|
-
prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
|
278
|
-
swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
|
279
|
-
sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
|
280
|
-
tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
|
281
|
-
gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
|
282
|
-
hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
|
283
|
-
hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
|
286
|
+
any_relu = schema.OperatorsSet(name=OPSET_ANY_RELU, qc_options=default_config_options_16bit)
|
287
|
+
add = schema.OperatorsSet(name=OPSET_ADD, qc_options=const_configuration_options_inout16)
|
288
|
+
sub = schema.OperatorsSet(name=OPSET_SUB, qc_options=const_configuration_options_inout16)
|
289
|
+
mul = schema.OperatorsSet(name=OPSET_MUL, qc_options=const_configuration_options_inout16)
|
290
|
+
div = schema.OperatorsSet(name=OPSET_DIV, qc_options=const_configuration_options)
|
291
|
+
min_max = schema.OperatorsSet(name=OPSET_MIN_MAX, qc_options=const_configuration_options_inout16)
|
292
|
+
prelu = schema.OperatorsSet(name=OPSET_PRELU, qc_options=default_config_options_16bit)
|
293
|
+
swish = schema.OperatorsSet(name=OPSET_SWISH, qc_options=default_config_options_16bit)
|
294
|
+
sigmoid = schema.OperatorsSet(name=OPSET_SIGMOID, qc_options=default_config_options_16bit)
|
295
|
+
tanh = schema.OperatorsSet(name=OPSET_TANH, qc_options=default_config_options_16bit)
|
296
|
+
gelu = schema.OperatorsSet(name=OPSET_GELU, qc_options=default_config_options_16bit)
|
297
|
+
hardsigmoid = schema.OperatorsSet(name=OPSET_HARDSIGMOID, qc_options=default_config_options_16bit)
|
298
|
+
hardswish = schema.OperatorsSet(name=OPSET_HARDSWISH, qc_options=default_config_options_16bit)
|
284
299
|
|
285
300
|
operator_set.extend(
|
286
301
|
[conv, fc, any_relu, add, sub, mul, div, prelu, swish, sigmoid, tanh, min_max, gelu, hardsigmoid, hardswish])
|
287
302
|
# Combine multiple operators into a single operator to avoid quantization between
|
288
303
|
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
289
304
|
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
290
|
-
activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid,
|
305
|
+
activations_after_conv_to_fuse = schema.OperatorSetConcat(operators_set=[any_relu, swish, prelu, sigmoid,
|
291
306
|
tanh, gelu, hardswish, hardsigmoid])
|
292
|
-
activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid, tanh, gelu,
|
307
|
+
activations_after_fc_to_fuse = schema.OperatorSetConcat(operators_set=[any_relu, swish, sigmoid, tanh, gelu,
|
293
308
|
hardswish, hardsigmoid])
|
294
|
-
any_binary = schema.OperatorSetConcat([add, sub, mul, div])
|
309
|
+
any_binary = schema.OperatorSetConcat(operators_set=[add, sub, mul, div])
|
295
310
|
|
296
311
|
# ------------------- #
|
297
312
|
# Fusions
|
298
313
|
# ------------------- #
|
299
|
-
fusing_patterns.append(schema.Fusing((conv, activations_after_conv_to_fuse)))
|
300
|
-
fusing_patterns.append(schema.Fusing((fc, activations_after_fc_to_fuse)))
|
301
|
-
fusing_patterns.append(schema.Fusing((any_binary, any_relu)))
|
314
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv, activations_after_conv_to_fuse)))
|
315
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(fc, activations_after_fc_to_fuse)))
|
316
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(any_binary, any_relu)))
|
302
317
|
|
303
318
|
# Create a TargetPlatformModel and set its default quantization config.
|
304
319
|
# This default configuration will be used for all operations
|
305
320
|
# unless specified otherwise (see OperatorsSet, for example):
|
306
321
|
generated_tpm = schema.TargetPlatformModel(
|
307
|
-
default_configuration_options,
|
322
|
+
default_qco=default_configuration_options,
|
308
323
|
tpc_minor_version=4,
|
309
324
|
tpc_patch_version=0,
|
310
325
|
tpc_platform_type=IMX500_TP_MODEL,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py
CHANGED
@@ -39,7 +39,8 @@ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tp
|
|
39
39
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tp_model import OPSET_NO_QUANTIZATION, \
|
40
40
|
OPSET_QUANTIZATION_PRESERVING, OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, OPSET_DIMENSION_MANIPULATION_OPS, \
|
41
41
|
OPSET_MERGE_OPS, OPSET_CONV, OPSET_FULLY_CONNECTED, OPSET_ANY_RELU, OPSET_ADD, OPSET_SUB, OPSET_MUL, OPSET_DIV, \
|
42
|
-
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH, OPSET_GELU, OPSET_BATCH_NORM, OPSET_MIN_MAX, OPSET_HARDSIGMOID
|
42
|
+
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH, OPSET_GELU, OPSET_BATCH_NORM, OPSET_MIN_MAX, OPSET_HARDSIGMOID, \
|
43
|
+
OPSET_SPLIT_OPS
|
43
44
|
|
44
45
|
tp = mct.target_platform
|
45
46
|
|
@@ -78,11 +79,7 @@ def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
|
78
79
|
ZeroPadding2D,
|
79
80
|
Dropout,
|
80
81
|
MaxPooling2D,
|
81
|
-
tf.
|
82
|
-
tf.cast,
|
83
|
-
tf.unstack,
|
84
|
-
tf.__operators__.getitem,
|
85
|
-
tf.strided_slice]
|
82
|
+
tf.cast]
|
86
83
|
quantization_preserving_list_16bit_input = [Reshape,
|
87
84
|
tf.reshape,
|
88
85
|
Permute,
|
@@ -97,6 +94,7 @@ def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
|
97
94
|
tp.OperationsSetToLayers(OPSET_QUANTIZATION_PRESERVING, quantization_preserving)
|
98
95
|
tp.OperationsSetToLayers(OPSET_DIMENSION_MANIPULATION_OPS, quantization_preserving_list_16bit_input)
|
99
96
|
tp.OperationsSetToLayers(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, [tf.gather, tf.compat.v1.gather])
|
97
|
+
tp.OperationsSetToLayers(OPSET_SPLIT_OPS,[tf.unstack, tf.split, tf.strided_slice, tf.__operators__.getitem])
|
100
98
|
tp.OperationsSetToLayers(OPSET_MERGE_OPS, [tf.stack, tf.concat, Concatenate])
|
101
99
|
tp.OperationsSetToLayers(OPSET_CONV,
|
102
100
|
[Conv2D,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py
CHANGED
@@ -36,7 +36,7 @@ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tp
|
|
36
36
|
OPSET_QUANTIZATION_PRESERVING, OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, OPSET_DIMENSION_MANIPULATION_OPS, \
|
37
37
|
OPSET_MERGE_OPS, OPSET_CONV, OPSET_FULLY_CONNECTED, OPSET_ANY_RELU, OPSET_ADD, OPSET_SUB, OPSET_MUL, OPSET_DIV, \
|
38
38
|
OPSET_PRELU, OPSET_SWISH, OPSET_SIGMOID, OPSET_TANH, OPSET_GELU, OPSET_BATCH_NORM, OPSET_MIN_MAX, OPSET_HARDSIGMOID, \
|
39
|
-
OPSET_HARDSWISH
|
39
|
+
OPSET_HARDSWISH, OPSET_SPLIT_OPS
|
40
40
|
|
41
41
|
tp = mct.target_platform
|
42
42
|
|
@@ -77,9 +77,6 @@ def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
|
77
77
|
topk])
|
78
78
|
tp.OperationsSetToLayers(OPSET_QUANTIZATION_PRESERVING, [Dropout,
|
79
79
|
dropout,
|
80
|
-
split,
|
81
|
-
chunk,
|
82
|
-
unbind,
|
83
80
|
MaxPool2d])
|
84
81
|
tp.OperationsSetToLayers(OPSET_DIMENSION_MANIPULATION_OPS, [Flatten,
|
85
82
|
flatten,
|
@@ -90,6 +87,7 @@ def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
|
90
87
|
permute,
|
91
88
|
transpose])
|
92
89
|
tp.OperationsSetToLayers(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, [gather, torch.Tensor.expand])
|
90
|
+
tp.OperationsSetToLayers(OPSET_SPLIT_OPS,[split, chunk, unbind])
|
93
91
|
tp.OperationsSetToLayers(OPSET_MERGE_OPS,
|
94
92
|
[torch.stack, torch.cat, torch.concat, torch.concatenate])
|
95
93
|
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py
CHANGED
@@ -139,7 +139,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
139
139
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
140
140
|
# If the QuantizationConfigOptions contains only one configuration,
|
141
141
|
# this configuration will be used for the operation quantization:
|
142
|
-
default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
|
142
|
+
default_configuration_options = schema.QuantizationConfigOptions(quantization_configurations=tuple([default_config]))
|
143
143
|
|
144
144
|
# Combine operations/modules into a single module.
|
145
145
|
# Pytorch supports the next fusing patterns:
|
@@ -148,24 +148,24 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
148
148
|
operator_set = []
|
149
149
|
fusing_patterns = []
|
150
150
|
|
151
|
-
conv = schema.OperatorsSet("Conv")
|
152
|
-
batchnorm = schema.OperatorsSet("BatchNorm")
|
153
|
-
relu = schema.OperatorsSet("Relu")
|
154
|
-
linear = schema.OperatorsSet("Linear")
|
151
|
+
conv = schema.OperatorsSet(name="Conv")
|
152
|
+
batchnorm = schema.OperatorsSet(name="BatchNorm")
|
153
|
+
relu = schema.OperatorsSet(name="Relu")
|
154
|
+
linear = schema.OperatorsSet(name="Linear")
|
155
155
|
|
156
156
|
operator_set.extend([conv, batchnorm, relu, linear])
|
157
157
|
# ------------------- #
|
158
158
|
# Fusions
|
159
159
|
# ------------------- #
|
160
|
-
fusing_patterns.append(schema.Fusing((conv, batchnorm, relu)))
|
161
|
-
fusing_patterns.append(schema.Fusing((conv, batchnorm)))
|
162
|
-
fusing_patterns.append(schema.Fusing((conv, relu)))
|
163
|
-
fusing_patterns.append(schema.Fusing((linear, relu)))
|
160
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv, batchnorm, relu)))
|
161
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv, batchnorm)))
|
162
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv, relu)))
|
163
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(linear, relu)))
|
164
164
|
# Create a TargetPlatformModel and set its default quantization config.
|
165
165
|
# This default configuration will be used for all operations
|
166
166
|
# unless specified otherwise (see OperatorsSet, for example):
|
167
167
|
generated_tpc = schema.TargetPlatformModel(
|
168
|
-
default_configuration_options,
|
168
|
+
default_qco=default_configuration_options,
|
169
169
|
tpc_minor_version=1,
|
170
170
|
tpc_patch_version=0,
|
171
171
|
tpc_platform_type=QNNPACK_TP_MODEL,
|
@@ -18,7 +18,8 @@ import model_compression_toolkit as mct
|
|
18
18
|
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR, TFLITE_TP_MODEL
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel,
|
21
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
|
22
|
+
Signedness, \
|
22
23
|
AttributeQuantizationConfig, OpQuantizationConfig
|
23
24
|
|
24
25
|
tp = mct.target_platform
|
@@ -136,7 +137,8 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
136
137
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
137
138
|
# If the QuantizationConfigOptions contains only one configuration,
|
138
139
|
# this configuration will be used for the operation quantization:
|
139
|
-
default_configuration_options = schema.QuantizationConfigOptions(
|
140
|
+
default_configuration_options = schema.QuantizationConfigOptions(
|
141
|
+
quantization_configurations=tuple([default_config]))
|
140
142
|
|
141
143
|
# In TFLite, the quantized operator specifications constraint operators quantization
|
142
144
|
# differently. For more details:
|
@@ -144,59 +146,60 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
144
146
|
operator_set = []
|
145
147
|
fusing_patterns = []
|
146
148
|
|
147
|
-
operator_set.append(schema.OperatorsSet("NoQuantization",
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
fc = schema.OperatorsSet("FullyConnected",
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
149
|
+
operator_set.append(schema.OperatorsSet(name="NoQuantization",
|
150
|
+
qc_options=default_configuration_options.clone_and_edit(
|
151
|
+
quantization_preserving=True)))
|
152
|
+
|
153
|
+
fc = schema.OperatorsSet(name="FullyConnected",
|
154
|
+
qc_options=default_configuration_options.clone_and_edit_weight_attribute(
|
155
|
+
weights_per_channel_threshold=False))
|
156
|
+
|
157
|
+
operator_set.append(schema.OperatorsSet(name="L2Normalization",
|
158
|
+
qc_options=default_configuration_options.clone_and_edit(
|
159
|
+
fixed_zero_point=0, fixed_scale=1 / 128)))
|
160
|
+
operator_set.append(schema.OperatorsSet(name="LogSoftmax",
|
161
|
+
qc_options=default_configuration_options.clone_and_edit(
|
162
|
+
fixed_zero_point=127, fixed_scale=16 / 256)))
|
163
|
+
operator_set.append(schema.OperatorsSet(name="Tanh",
|
164
|
+
qc_options=default_configuration_options.clone_and_edit(
|
165
|
+
fixed_zero_point=0, fixed_scale=1 / 128)))
|
166
|
+
operator_set.append(schema.OperatorsSet(name="Softmax",
|
167
|
+
qc_options=default_configuration_options.clone_and_edit(
|
168
|
+
fixed_zero_point=-128, fixed_scale=1 / 256)))
|
169
|
+
operator_set.append(schema.OperatorsSet(name="Logistic",
|
170
|
+
qc_options=default_configuration_options.clone_and_edit(
|
171
|
+
fixed_zero_point=-128, fixed_scale=1 / 256)))
|
172
|
+
|
173
|
+
conv2d = schema.OperatorsSet(name="Conv2d")
|
174
|
+
kernel = schema.OperatorSetConcat(operators_set=[conv2d, fc])
|
175
|
+
|
176
|
+
relu = schema.OperatorsSet(name="Relu")
|
177
|
+
elu = schema.OperatorsSet(name="Elu")
|
178
|
+
activations_to_fuse = schema.OperatorSetConcat(operators_set=[relu, elu])
|
179
|
+
|
180
|
+
batch_norm = schema.OperatorsSet(name="BatchNorm")
|
181
|
+
bias_add = schema.OperatorsSet(name="BiasAdd")
|
182
|
+
add = schema.OperatorsSet(name="Add")
|
183
|
+
squeeze = schema.OperatorsSet(name="Squeeze",
|
184
|
+
qc_options=default_configuration_options.clone_and_edit(
|
185
|
+
quantization_preserving=True))
|
186
|
+
operator_set.extend([fc, conv2d, relu, elu, batch_norm, bias_add, add, squeeze])
|
184
187
|
# ------------------- #
|
185
188
|
# Fusions
|
186
189
|
# ------------------- #
|
187
190
|
# Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper
|
188
|
-
fusing_patterns.append(schema.Fusing((kernel, bias_add)))
|
189
|
-
fusing_patterns.append(schema.Fusing((kernel, bias_add, activations_to_fuse)))
|
190
|
-
fusing_patterns.append(schema.Fusing((conv2d, batch_norm, activations_to_fuse)))
|
191
|
-
fusing_patterns.append(schema.Fusing((conv2d, squeeze, activations_to_fuse)))
|
192
|
-
fusing_patterns.append(schema.Fusing((batch_norm, activations_to_fuse)))
|
193
|
-
fusing_patterns.append(schema.Fusing((batch_norm, add, activations_to_fuse)))
|
191
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(kernel, bias_add)))
|
192
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(kernel, bias_add, activations_to_fuse)))
|
193
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv2d, batch_norm, activations_to_fuse)))
|
194
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(conv2d, squeeze, activations_to_fuse)))
|
195
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(batch_norm, activations_to_fuse)))
|
196
|
+
fusing_patterns.append(schema.Fusing(operator_groups=(batch_norm, add, activations_to_fuse)))
|
194
197
|
|
195
198
|
# Create a TargetPlatformModel and set its default quantization config.
|
196
199
|
# This default configuration will be used for all operations
|
197
200
|
# unless specified otherwise (see OperatorsSet, for example):
|
198
201
|
generated_tpc = schema.TargetPlatformModel(
|
199
|
-
default_configuration_options,
|
202
|
+
default_qco=default_configuration_options,
|
200
203
|
tpc_minor_version=1,
|
201
204
|
tpc_patch_version=0,
|
202
205
|
operator_set=tuple(operator_set),
|
{mct_nightly-2.2.0.20241230.534.dist-info → mct_nightly-2.2.0.20250102.111338.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|