mct-nightly 2.2.0.20241222.533__py3-none-any.whl → 2.2.0.20241223.525__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/RECORD +26 -28
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/core/common/graph/base_graph.py +1 -1
  5. model_compression_toolkit/core/common/graph/base_node.py +3 -3
  6. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +4 -4
  7. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +2 -2
  8. model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +1 -0
  9. model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +4 -5
  10. model_compression_toolkit/target_platform_capabilities/schema/v1.py +63 -170
  11. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -1
  12. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +1 -1
  13. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +7 -4
  14. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +50 -51
  15. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +54 -52
  16. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +57 -53
  17. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +52 -51
  18. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +53 -51
  19. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +59 -57
  20. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +54 -52
  21. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +90 -83
  22. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +26 -24
  23. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +57 -55
  24. model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +0 -67
  25. model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +0 -30
  26. {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/LICENSE.md +0 -0
  27. {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/WHEEL +0 -0
  28. {mct_nightly-2.2.0.20241222.533.dist-info → mct_nightly-2.2.0.20241223.525.dist-info}/top_level.txt +0 -0
@@ -19,7 +19,8 @@ import model_compression_toolkit.target_platform_capabilities.schema.mct_current
19
19
  from model_compression_toolkit.constants import FLOAT_BITWIDTH
20
20
  from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
21
21
  WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL
22
- from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
22
+ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
23
+ Signedness, \
23
24
  AttributeQuantizationConfig, OpQuantizationConfig
24
25
 
25
26
  tp = mct.target_platform
@@ -152,7 +153,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
152
153
  # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
153
154
  # If the QuantizationConfigOptions contains only one configuration,
154
155
  # this configuration will be used for the operation quantization:
155
- default_configuration_options = schema.QuantizationConfigOptions([default_config])
156
+ default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
156
157
 
157
158
  # Create a QuantizationConfigOptions for quantizing constants in functional ops.
158
159
  # Constant configuration is similar to the default eight bit configuration except for PoT
@@ -163,7 +164,55 @@ def generate_tp_model(default_config: OpQuantizationConfig,
163
164
  default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
164
165
  enable_weights_quantization=True, weights_per_channel_threshold=True,
165
166
  weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
166
- const_configuration_options = schema.QuantizationConfigOptions([const_config])
167
+ const_configuration_options = schema.QuantizationConfigOptions(tuple([const_config]))
168
+
169
+ # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
170
+ mixed_precision_configuration_options = schema.QuantizationConfigOptions(tuple(mixed_precision_cfg_list),
171
+ base_config=base_config)
172
+
173
+ # Create an OperatorsSet to represent a set of operations.
174
+ # Each OperatorsSet has a unique label.
175
+ # If a quantization configuration options is passed, these options will
176
+ # be used for operations that will be attached to this set's label.
177
+ # Otherwise, it will be a configure-less set (used in fusing):
178
+ operator_set = []
179
+ fusing_patterns = []
180
+ # May suit for operations like: Dropout, Reshape, etc.
181
+ operator_set.append(schema.OperatorsSet("NoQuantization",
182
+ default_configuration_options.clone_and_edit(
183
+ enable_activation_quantization=False)
184
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False)))
185
+
186
+ # Define operator sets that use mixed_precision_configuration_options:
187
+ conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options)
188
+ fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
189
+
190
+ # Define operations sets without quantization configuration
191
+ # options (useful for creating fusing patterns, for example):
192
+ any_relu = schema.OperatorsSet("AnyReLU")
193
+ add = schema.OperatorsSet("Add", const_configuration_options)
194
+ sub = schema.OperatorsSet("Sub", const_configuration_options)
195
+ mul = schema.OperatorsSet("Mul", const_configuration_options)
196
+ div = schema.OperatorsSet("Div", const_configuration_options)
197
+ prelu = schema.OperatorsSet("PReLU")
198
+ swish = schema.OperatorsSet("Swish")
199
+ sigmoid = schema.OperatorsSet("Sigmoid")
200
+ tanh = schema.OperatorsSet("Tanh")
201
+
202
+ operator_set.extend([conv, fc, any_relu, add, sub, mul, div, prelu, swish, sigmoid, tanh])
203
+ # Combine multiple operators into a single operator to avoid quantization between
204
+ # them. To do this we define fusing patterns using the OperatorsSets that were created.
205
+ # To group multiple sets with regard to fusing, an OperatorSetConcat can be created
206
+ activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid, tanh])
207
+ activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid])
208
+ any_binary = schema.OperatorSetConcat([add, sub, mul, div])
209
+
210
+ # ------------------- #
211
+ # Fusions
212
+ # ------------------- #
213
+ fusing_patterns.append(schema.Fusing((conv, activations_after_conv_to_fuse)))
214
+ fusing_patterns.append(schema.Fusing((fc, activations_after_fc_to_fuse)))
215
+ fusing_patterns.append(schema.Fusing((any_binary, any_relu)))
167
216
 
168
217
  # Create a TargetPlatformModel and set its default quantization config.
169
218
  # This default configuration will be used for all operations
@@ -173,56 +222,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
173
222
  tpc_minor_version=3,
174
223
  tpc_patch_version=0,
175
224
  tpc_platform_type=IMX500_TP_MODEL,
225
+ operator_set=tuple(operator_set),
226
+ fusing_patterns=tuple(fusing_patterns),
176
227
  add_metadata=True,
177
228
  name=name)
178
229
 
179
- # To start defining the model's components (such as operator sets, and fusing patterns),
180
- # use 'with' the TargetPlatformModel instance, and create them as below:
181
- with generated_tpm:
182
- # Create an OperatorsSet to represent a set of operations.
183
- # Each OperatorsSet has a unique label.
184
- # If a quantization configuration options is passed, these options will
185
- # be used for operations that will be attached to this set's label.
186
- # Otherwise, it will be a configure-less set (used in fusing):
187
-
188
- # May suit for operations like: Dropout, Reshape, etc.
189
- default_qco = tp.get_default_quantization_config_options()
190
- schema.OperatorsSet("NoQuantization",
191
- default_qco.clone_and_edit(enable_activation_quantization=False)
192
- .clone_and_edit_weight_attribute(enable_weights_quantization=False))
193
-
194
- # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
195
- mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list,
196
- base_config=base_config)
197
-
198
- # Define operator sets that use mixed_precision_configuration_options:
199
- conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options)
200
- fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
201
-
202
- # Define operations sets without quantization configuration
203
- # options (useful for creating fusing patterns, for example):
204
- any_relu = schema.OperatorsSet("AnyReLU")
205
- add = schema.OperatorsSet("Add", const_configuration_options)
206
- sub = schema.OperatorsSet("Sub", const_configuration_options)
207
- mul = schema.OperatorsSet("Mul", const_configuration_options)
208
- div = schema.OperatorsSet("Div", const_configuration_options)
209
- prelu = schema.OperatorsSet("PReLU")
210
- swish = schema.OperatorsSet("Swish")
211
- sigmoid = schema.OperatorsSet("Sigmoid")
212
- tanh = schema.OperatorsSet("Tanh")
213
-
214
- # Combine multiple operators into a single operator to avoid quantization between
215
- # them. To do this we define fusing patterns using the OperatorsSets that were created.
216
- # To group multiple sets with regard to fusing, an OperatorSetConcat can be created
217
- activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid, tanh])
218
- activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid])
219
- any_binary = schema.OperatorSetConcat([add, sub, mul, div])
220
-
221
- # ------------------- #
222
- # Fusions
223
- # ------------------- #
224
- schema.Fusing([conv, activations_after_conv_to_fuse])
225
- schema.Fusing([fc, activations_after_fc_to_fuse])
226
- schema.Fusing([any_binary, any_relu])
227
-
228
230
  return generated_tpm
@@ -19,7 +19,8 @@ import model_compression_toolkit.target_platform_capabilities.schema.v1 as schem
19
19
  from model_compression_toolkit.constants import FLOAT_BITWIDTH
20
20
  from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
21
21
  IMX500_TP_MODEL
22
- from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
22
+ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
23
+ Signedness, \
23
24
  AttributeQuantizationConfig, OpQuantizationConfig
24
25
 
25
26
  tp = mct.target_platform
@@ -87,7 +88,8 @@ def get_op_quantization_configs() -> \
87
88
  weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
88
89
  weights_n_bits=8,
89
90
  weights_per_channel_threshold=False,
90
- enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization
91
+ enable_weights_quantization=False,
92
+ # TODO: this will changed to True once implementing multi-attributes quantization
91
93
  lut_values_bitwidth=None)
92
94
 
93
95
  # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
@@ -176,13 +178,13 @@ def generate_tp_model(default_config: OpQuantizationConfig,
176
178
  # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
177
179
  # If the QuantizationConfigOptions contains only one configuration,
178
180
  # this configuration will be used for the operation quantization:
179
- default_configuration_options = schema.QuantizationConfigOptions([default_config])
181
+ default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
180
182
  default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16))
181
- default_config_options_16bit = schema.QuantizationConfigOptions([default_config_input16,
182
- default_config_input16.clone_and_edit(
183
- activation_n_bits=16,
184
- signedness=Signedness.SIGNED)],
185
- base_config=default_config_input16)
183
+ default_config_options_16bit = schema.QuantizationConfigOptions(tuple([default_config_input16,
184
+ default_config_input16.clone_and_edit(
185
+ activation_n_bits=16,
186
+ signedness=Signedness.SIGNED)]),
187
+ base_config=default_config_input16)
186
188
 
187
189
  # Create a QuantizationConfigOptions for quantizing constants in functional ops.
188
190
  # Constant configuration is similar to the default eight bit configuration except for PoT
@@ -193,7 +195,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
193
195
  default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
194
196
  enable_weights_quantization=True, weights_per_channel_threshold=True,
195
197
  weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
196
- const_configuration_options = schema.QuantizationConfigOptions([const_config])
198
+ const_configuration_options = schema.QuantizationConfigOptions(tuple([const_config]))
197
199
 
198
200
  # 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that
199
201
  # support 16 bit as input and output.
@@ -201,9 +203,9 @@ def generate_tp_model(default_config: OpQuantizationConfig,
201
203
  supported_input_activation_n_bits=(8, 16))
202
204
  const_config_input16_output16 = const_config_input16.clone_and_edit(
203
205
  activation_n_bits=16, signedness=Signedness.SIGNED)
204
- const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16,
205
- const_config_input16],
206
- base_config=const_config_input16)
206
+ const_configuration_options_inout16 = schema.QuantizationConfigOptions(tuple([const_config_input16_output16,
207
+ const_config_input16]),
208
+ base_config=const_config_input16)
207
209
 
208
210
  const_config_input16_per_tensor = const_config.clone_and_edit(
209
211
  supported_input_activation_n_bits=(8, 16),
@@ -213,20 +215,91 @@ def generate_tp_model(default_config: OpQuantizationConfig,
213
215
  )
214
216
  const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit(
215
217
  activation_n_bits=16, signedness=Signedness.SIGNED)
216
- const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(
218
+ const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(tuple(
217
219
  [const_config_input16_output16_per_tensor,
218
- const_config_input16_per_tensor],
220
+ const_config_input16_per_tensor]),
219
221
  base_config=const_config_input16_per_tensor)
220
222
 
221
223
  qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False,
222
224
  quantization_preserving=True,
223
225
  default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit(
224
226
  weights_per_channel_threshold=False))
225
- qpreserving_const_config_options = schema.QuantizationConfigOptions([qpreserving_const_config])
227
+ qpreserving_const_config_options = schema.QuantizationConfigOptions(tuple([qpreserving_const_config]))
226
228
 
227
229
  mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)
228
230
  for mp_cfg in mixed_precision_cfg_list]
229
231
 
232
+ # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
233
+ mixed_precision_configuration_options = schema.QuantizationConfigOptions(tuple(
234
+ mixed_precision_cfg_list + mp_cfg_list_16bit),
235
+ base_config=base_config)
236
+
237
+ # Create an OperatorsSet to represent a set of operations.
238
+ # Each OperatorsSet has a unique label.
239
+ # If a quantization configuration options is passed, these options will
240
+ # be used for operations that will be attached to this set's label.
241
+ # Otherwise, it will be a configure-less set (used in fusing):
242
+ operator_set = []
243
+ fusing_patterns = []
244
+ # May suit for operations like: Dropout, Reshape, etc.
245
+ operator_set.append(schema.OperatorsSet(OPSET_NO_QUANTIZATION,
246
+ default_configuration_options.clone_and_edit(
247
+ enable_activation_quantization=False)
248
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False)))
249
+ operator_set.append(schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
250
+ default_configuration_options.clone_and_edit(
251
+ enable_activation_quantization=False,
252
+ quantization_preserving=True)
253
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False)))
254
+ operator_set.append(
255
+ schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options))
256
+ operator_set.append(schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS,
257
+ default_configuration_options.clone_and_edit(
258
+ enable_activation_quantization=False,
259
+ quantization_preserving=True,
260
+ supported_input_activation_n_bits=(8, 16))
261
+ .clone_and_edit_weight_attribute(enable_weights_quantization=False)))
262
+ operator_set.append(schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor))
263
+
264
+ # Define operator sets that use mixed_precision_configuration_options:
265
+ conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
266
+ fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
267
+
268
+ operator_set.append(schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit))
269
+
270
+ # Note: Operations sets without quantization configuration are useful for creating fusing patterns
271
+ any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
272
+ add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
273
+ sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
274
+ mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
275
+ div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
276
+ min_max = schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
277
+ prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
278
+ swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
279
+ sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
280
+ tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
281
+ gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
282
+ hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
283
+ hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
284
+
285
+ operator_set.extend(
286
+ [conv, fc, any_relu, add, sub, mul, div, prelu, swish, sigmoid, tanh, min_max, gelu, hardsigmoid, hardswish])
287
+ # Combine multiple operators into a single operator to avoid quantization between
288
+ # them. To do this we define fusing patterns using the OperatorsSets that were created.
289
+ # To group multiple sets with regard to fusing, an OperatorSetConcat can be created
290
+ activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid,
291
+ tanh, gelu, hardswish, hardsigmoid])
292
+ activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid, tanh, gelu,
293
+ hardswish, hardsigmoid])
294
+ any_binary = schema.OperatorSetConcat([add, sub, mul, div])
295
+
296
+ # ------------------- #
297
+ # Fusions
298
+ # ------------------- #
299
+ fusing_patterns.append(schema.Fusing((conv, activations_after_conv_to_fuse)))
300
+ fusing_patterns.append(schema.Fusing((fc, activations_after_fc_to_fuse)))
301
+ fusing_patterns.append(schema.Fusing((any_binary, any_relu)))
302
+
230
303
  # Create a TargetPlatformModel and set its default quantization config.
231
304
  # This default configuration will be used for all operations
232
305
  # unless specified otherwise (see OperatorsSet, for example):
@@ -235,76 +308,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
235
308
  tpc_minor_version=4,
236
309
  tpc_patch_version=0,
237
310
  tpc_platform_type=IMX500_TP_MODEL,
311
+ operator_set=tuple(operator_set),
312
+ fusing_patterns=tuple(fusing_patterns),
238
313
  add_metadata=True,
239
314
  name=name,
240
315
  is_simd_padding=True)
241
316
 
242
- # To start defining the model's components (such as operator sets, and fusing patterns),
243
- # use 'with' the TargetPlatformModel instance, and create them as below:
244
- with generated_tpm:
245
- # Create an OperatorsSet to represent a set of operations.
246
- # Each OperatorsSet has a unique label.
247
- # If a quantization configuration options is passed, these options will
248
- # be used for operations that will be attached to this set's label.
249
- # Otherwise, it will be a configure-less set (used in fusing):
250
-
251
- # May suit for operations like: Dropout, Reshape, etc.
252
- default_qco = tp.get_default_quantization_config_options()
253
- schema.OperatorsSet(OPSET_NO_QUANTIZATION,
254
- default_qco.clone_and_edit(enable_activation_quantization=False)
255
- .clone_and_edit_weight_attribute(enable_weights_quantization=False))
256
- schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
257
- default_qco.clone_and_edit(enable_activation_quantization=False,
258
- quantization_preserving=True)
259
- .clone_and_edit_weight_attribute(enable_weights_quantization=False))
260
- schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options)
261
- schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS,
262
- default_qco.clone_and_edit(enable_activation_quantization=False,
263
- quantization_preserving=True,
264
- supported_input_activation_n_bits=(8, 16))
265
- .clone_and_edit_weight_attribute(enable_weights_quantization=False))
266
- schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor)
267
-
268
- # Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
269
- mixed_precision_configuration_options = schema.QuantizationConfigOptions(
270
- mixed_precision_cfg_list + mp_cfg_list_16bit,
271
- base_config=base_config)
272
-
273
- # Define operator sets that use mixed_precision_configuration_options:
274
- conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
275
- fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
276
-
277
- schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit)
278
-
279
- # Note: Operations sets without quantization configuration are useful for creating fusing patterns
280
- any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
281
- add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
282
- sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
283
- mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
284
- div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
285
- schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
286
- prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
287
- swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
288
- sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
289
- tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
290
- gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
291
- hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
292
- hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
293
-
294
- # Combine multiple operators into a single operator to avoid quantization between
295
- # them. To do this we define fusing patterns using the OperatorsSets that were created.
296
- # To group multiple sets with regard to fusing, an OperatorSetConcat can be created
297
- activations_after_conv_to_fuse = schema.OperatorSetConcat([any_relu, swish, prelu, sigmoid,
298
- tanh, gelu, hardswish, hardsigmoid])
299
- activations_after_fc_to_fuse = schema.OperatorSetConcat([any_relu, swish, sigmoid, tanh, gelu,
300
- hardswish, hardsigmoid])
301
- any_binary = schema.OperatorSetConcat([add, sub, mul, div])
302
-
303
- # ------------------- #
304
- # Fusions
305
- # ------------------- #
306
- schema.Fusing([conv, activations_after_conv_to_fuse])
307
- schema.Fusing([fc, activations_after_fc_to_fuse])
308
- schema.Fusing([any_binary, any_relu])
309
-
310
317
  return generated_tpm
@@ -18,7 +18,8 @@ import model_compression_toolkit as mct
18
18
  import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
19
19
  from model_compression_toolkit.constants import FLOAT_BITWIDTH
20
20
  from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL
21
- from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
21
+ from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, \
22
+ Signedness, \
22
23
  AttributeQuantizationConfig, OpQuantizationConfig
23
24
 
24
25
  tp = mct.target_platform
@@ -138,8 +139,28 @@ def generate_tp_model(default_config: OpQuantizationConfig,
138
139
  # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
139
140
  # If the QuantizationConfigOptions contains only one configuration,
140
141
  # this configuration will be used for the operation quantization:
141
- default_configuration_options = schema.QuantizationConfigOptions([default_config])
142
-
142
+ default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
143
+
144
+ # Combine operations/modules into a single module.
145
+ # Pytorch supports the next fusing patterns:
146
+ # [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
147
+ # Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode
148
+ operator_set = []
149
+ fusing_patterns = []
150
+
151
+ conv = schema.OperatorsSet("Conv")
152
+ batchnorm = schema.OperatorsSet("BatchNorm")
153
+ relu = schema.OperatorsSet("Relu")
154
+ linear = schema.OperatorsSet("Linear")
155
+
156
+ operator_set.extend([conv, batchnorm, relu, linear])
157
+ # ------------------- #
158
+ # Fusions
159
+ # ------------------- #
160
+ fusing_patterns.append(schema.Fusing((conv, batchnorm, relu)))
161
+ fusing_patterns.append(schema.Fusing((conv, batchnorm)))
162
+ fusing_patterns.append(schema.Fusing((conv, relu)))
163
+ fusing_patterns.append(schema.Fusing((linear, relu)))
143
164
  # Create a TargetPlatformModel and set its default quantization config.
144
165
  # This default configuration will be used for all operations
145
166
  # unless specified otherwise (see OperatorsSet, for example):
@@ -148,27 +169,8 @@ def generate_tp_model(default_config: OpQuantizationConfig,
148
169
  tpc_minor_version=1,
149
170
  tpc_patch_version=0,
150
171
  tpc_platform_type=QNNPACK_TP_MODEL,
172
+ operator_set=tuple(operator_set),
173
+ fusing_patterns=tuple(fusing_patterns),
151
174
  add_metadata=False,
152
175
  name=name)
153
-
154
- # To start defining the model's components (such as operator sets, and fusing patterns),
155
- # use 'with' the target platform model instance, and create them as below:
156
- with generated_tpc:
157
- # Combine operations/modules into a single module.
158
- # Pytorch supports the next fusing patterns:
159
- # [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
160
- # Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode
161
- conv = schema.OperatorsSet("Conv")
162
- batchnorm = schema.OperatorsSet("BatchNorm")
163
- relu = schema.OperatorsSet("Relu")
164
- linear = schema.OperatorsSet("Linear")
165
-
166
- # ------------------- #
167
- # Fusions
168
- # ------------------- #
169
- schema.Fusing([conv, batchnorm, relu])
170
- schema.Fusing([conv, batchnorm])
171
- schema.Fusing([conv, relu])
172
- schema.Fusing([linear, relu])
173
-
174
176
  return generated_tpc
@@ -136,7 +136,61 @@ def generate_tp_model(default_config: OpQuantizationConfig,
136
136
  # of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
137
137
  # If the QuantizationConfigOptions contains only one configuration,
138
138
  # this configuration will be used for the operation quantization:
139
- default_configuration_options = schema.QuantizationConfigOptions([default_config])
139
+ default_configuration_options = schema.QuantizationConfigOptions(tuple([default_config]))
140
+
141
+ # In TFLite, the quantized operator specifications constraint operators quantization
142
+ # differently. For more details:
143
+ # https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications
144
+ operator_set = []
145
+ fusing_patterns = []
146
+
147
+ operator_set.append(schema.OperatorsSet("NoQuantization",
148
+ default_configuration_options.clone_and_edit(
149
+ quantization_preserving=True)))
150
+
151
+ fc = schema.OperatorsSet("FullyConnected",
152
+ default_configuration_options.clone_and_edit_weight_attribute(weights_per_channel_threshold=False))
153
+
154
+ operator_set.append(schema.OperatorsSet("L2Normalization",
155
+ default_configuration_options.clone_and_edit(
156
+ fixed_zero_point=0, fixed_scale=1 / 128)))
157
+ operator_set.append(schema.OperatorsSet("LogSoftmax",
158
+ default_configuration_options.clone_and_edit(
159
+ fixed_zero_point=127, fixed_scale=16 / 256)))
160
+ operator_set.append(schema.OperatorsSet("Tanh",
161
+ default_configuration_options.clone_and_edit(
162
+ fixed_zero_point=0, fixed_scale=1 / 128)))
163
+ operator_set.append(schema.OperatorsSet("Softmax",
164
+ default_configuration_options.clone_and_edit(
165
+ fixed_zero_point=-128, fixed_scale=1 / 256)))
166
+ operator_set.append(schema.OperatorsSet("Logistic",
167
+ default_configuration_options.clone_and_edit(
168
+ fixed_zero_point=-128, fixed_scale=1 / 256)))
169
+
170
+ conv2d = schema.OperatorsSet("Conv2d")
171
+ kernel = schema.OperatorSetConcat([conv2d, fc])
172
+
173
+ relu = schema.OperatorsSet("Relu")
174
+ elu = schema.OperatorsSet("Elu")
175
+ activations_to_fuse = schema.OperatorSetConcat([relu, elu])
176
+
177
+ batch_norm = schema.OperatorsSet("BatchNorm")
178
+ bias_add = schema.OperatorsSet("BiasAdd")
179
+ add = schema.OperatorsSet("Add")
180
+ squeeze = schema.OperatorsSet("Squeeze",
181
+ qc_options=default_configuration_options.clone_and_edit(
182
+ quantization_preserving=True))
183
+ operator_set.extend([fc, conv2d, kernel, relu, elu, batch_norm, bias_add, add, squeeze])
184
+ # ------------------- #
185
+ # Fusions
186
+ # ------------------- #
187
+ # Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper
188
+ fusing_patterns.append(schema.Fusing((kernel, bias_add)))
189
+ fusing_patterns.append(schema.Fusing((kernel, bias_add, activations_to_fuse)))
190
+ fusing_patterns.append(schema.Fusing((conv2d, batch_norm, activations_to_fuse)))
191
+ fusing_patterns.append(schema.Fusing((conv2d, squeeze, activations_to_fuse)))
192
+ fusing_patterns.append(schema.Fusing((batch_norm, activations_to_fuse)))
193
+ fusing_patterns.append(schema.Fusing((batch_norm, add, activations_to_fuse)))
140
194
 
141
195
  # Create a TargetPlatformModel and set its default quantization config.
142
196
  # This default configuration will be used for all operations
@@ -145,62 +199,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
145
199
  default_configuration_options,
146
200
  tpc_minor_version=1,
147
201
  tpc_patch_version=0,
202
+ operator_set=tuple(operator_set),
203
+ fusing_patterns=tuple(fusing_patterns),
148
204
  tpc_platform_type=TFLITE_TP_MODEL,
149
205
  add_metadata=False,
150
206
  name=name)
151
207
 
152
- # To start defining the model's components (such as operator sets, and fusing patterns),
153
- # use 'with' the TargetPlatformModel instance, and create them as below:
154
- with generated_tpc:
155
- # In TFLite, the quantized operator specifications constraint operators quantization
156
- # differently. For more details:
157
- # https://www.tensorflow.org/lite/performance/quantization_spec#int8_quantized_operator_specifications
158
- schema.OperatorsSet("NoQuantization",
159
- tp.get_default_quantization_config_options().clone_and_edit(
160
- quantization_preserving=True))
161
-
162
- fc_qco = tp.get_default_quantization_config_options()
163
- fc = schema.OperatorsSet("FullyConnected",
164
- fc_qco.clone_and_edit_weight_attribute(weights_per_channel_threshold=False))
165
-
166
- schema.OperatorsSet("L2Normalization",
167
- tp.get_default_quantization_config_options().clone_and_edit(
168
- fixed_zero_point=0, fixed_scale=1 / 128))
169
- schema.OperatorsSet("LogSoftmax",
170
- tp.get_default_quantization_config_options().clone_and_edit(
171
- fixed_zero_point=127, fixed_scale=16 / 256))
172
- schema.OperatorsSet("Tanh",
173
- tp.get_default_quantization_config_options().clone_and_edit(
174
- fixed_zero_point=0, fixed_scale=1 / 128))
175
- schema.OperatorsSet("Softmax",
176
- tp.get_default_quantization_config_options().clone_and_edit(
177
- fixed_zero_point=-128, fixed_scale=1 / 256))
178
- schema.OperatorsSet("Logistic",
179
- tp.get_default_quantization_config_options().clone_and_edit(
180
- fixed_zero_point=-128, fixed_scale=1 / 256))
181
-
182
- conv2d = schema.OperatorsSet("Conv2d")
183
- kernel = schema.OperatorSetConcat([conv2d, fc])
184
-
185
- relu = schema.OperatorsSet("Relu")
186
- elu = schema.OperatorsSet("Elu")
187
- activations_to_fuse = schema.OperatorSetConcat([relu, elu])
188
-
189
- batch_norm = schema.OperatorsSet("BatchNorm")
190
- bias_add = schema.OperatorsSet("BiasAdd")
191
- add = schema.OperatorsSet("Add")
192
- squeeze = schema.OperatorsSet("Squeeze",
193
- qc_options=tp.get_default_quantization_config_options().clone_and_edit(
194
- quantization_preserving=True))
195
- # ------------------- #
196
- # Fusions
197
- # ------------------- #
198
- # Source: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/grappler/optimizers/remapper
199
- schema.Fusing([kernel, bias_add])
200
- schema.Fusing([kernel, bias_add, activations_to_fuse])
201
- schema.Fusing([conv2d, batch_norm, activations_to_fuse])
202
- schema.Fusing([conv2d, squeeze, activations_to_fuse])
203
- schema.Fusing([batch_norm, activations_to_fuse])
204
- schema.Fusing([batch_norm, add, activations_to_fuse])
205
-
206
208
  return generated_tpc
@@ -1,67 +0,0 @@
1
- # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
-
16
- from model_compression_toolkit.logger import Logger
17
-
18
- def get_current_tp_model():
19
- """
20
-
21
- Returns: The current TargetPlatformModel that is being used and accessed.
22
-
23
- """
24
- return _current_tp_model.get()
25
-
26
-
27
- class CurrentTPModel:
28
- """
29
- Wrapper of the current TargetPlatformModel object that is being accessed and defined.
30
- """
31
-
32
- def __init__(self):
33
- super(CurrentTPModel, self).__init__()
34
- self.tp_model = None
35
-
36
- def get(self):
37
- """
38
-
39
- Returns: The current TargetPlatformModel that is being defined.
40
-
41
- """
42
- if self.tp_model is None:
43
- Logger.critical('Target platform model is not initialized.') # pragma: no cover
44
- return self.tp_model
45
-
46
- def reset(self):
47
- """
48
-
49
- Reset the current TargetPlatformModel so a new TargetPlatformModel can be wrapped and
50
- used as the current TargetPlatformModel object.
51
-
52
- """
53
- self.tp_model = None
54
-
55
- def set(self, tp_model):
56
- """
57
- Set and wrap a TargetPlatformModel as the current TargetPlatformModel.
58
-
59
- Args:
60
- tp_model: TargetPlatformModel to set as the current TargetPlatformModel to access and use.
61
-
62
- """
63
- self.tp_model = tp_model
64
-
65
-
66
- # Use a single instance for the current model.
67
- _current_tp_model = CurrentTPModel()