mct-nightly 2.2.0.20241201.617__py3-none-any.whl → 2.2.0.20241202.131715__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241201.617.dist-info → mct_nightly-2.2.0.20241202.131715.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241201.617.dist-info → mct_nightly-2.2.0.20241202.131715.dist-info}/RECORD +58 -58
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +0 -3
- model_compression_toolkit/core/common/graph/base_node.py +7 -5
- model_compression_toolkit/core/common/graph/functional_node.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +2 -2
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +2 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +1 -1
- model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py +1 -1
- model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +2 -2
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +2 -2
- model_compression_toolkit/core/keras/back2framework/keras_model_builder.py +5 -1
- model_compression_toolkit/metadata.py +14 -5
- model_compression_toolkit/target_platform_capabilities/schema/__init__.py +14 -0
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +11 -0
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +37 -0
- model_compression_toolkit/target_platform_capabilities/{target_platform/op_quantization_config.py → schema/v1.py} +377 -24
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +3 -5
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -214
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +1 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +6 -10
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +39 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_pytorch.py +3 -5
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +36 -31
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +37 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +39 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +36 -31
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +45 -38
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +37 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +70 -62
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +3 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +22 -17
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +56 -51
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py +3 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py +3 -4
- model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +0 -85
- model_compression_toolkit/target_platform_capabilities/target_platform/operators.py +0 -87
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model_component.py +0 -40
- {mct_nightly-2.2.0.20241201.617.dist-info → mct_nightly-2.2.0.20241202.131715.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241201.617.dist-info → mct_nightly-2.2.0.20241202.131715.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241201.617.dist-info → mct_nightly-2.2.0.20241202.131715.dist-info}/top_level.txt +0 -0
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py
CHANGED
@@ -15,13 +15,12 @@
|
|
15
15
|
from typing import List, Tuple
|
16
16
|
|
17
17
|
import model_compression_toolkit as mct
|
18
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
18
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
20
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
20
|
-
WEIGHTS_QUANTIZATION_METHOD
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.
|
22
|
-
|
23
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
24
|
-
AttributeQuantizationConfig
|
21
|
+
WEIGHTS_QUANTIZATION_METHOD, IMX500_TP_MODEL
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
|
23
|
+
AttributeQuantizationConfig, OpQuantizationConfig
|
25
24
|
|
26
25
|
tp = mct.target_platform
|
27
26
|
|
@@ -86,7 +85,7 @@ def get_op_quantization_configs() -> \
|
|
86
85
|
|
87
86
|
# We define a default config for operation without kernel attribute.
|
88
87
|
# This is the default config that should be used for non-linear operations.
|
89
|
-
eight_bits_default =
|
88
|
+
eight_bits_default = schema.OpQuantizationConfig(
|
90
89
|
default_weight_attr_config=default_weight_attr_config,
|
91
90
|
attr_weights_configs_mapping={},
|
92
91
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
@@ -100,7 +99,7 @@ def get_op_quantization_configs() -> \
|
|
100
99
|
signedness=Signedness.AUTO)
|
101
100
|
|
102
101
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
103
|
-
linear_eight_bits =
|
102
|
+
linear_eight_bits = schema.OpQuantizationConfig(
|
104
103
|
default_weight_attr_config=default_weight_attr_config,
|
105
104
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
106
105
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
@@ -153,7 +152,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
153
152
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
154
153
|
# If the QuantizationConfigOptions contains only one configuration,
|
155
154
|
# this configuration will be used for the operation quantization:
|
156
|
-
default_configuration_options =
|
155
|
+
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
157
156
|
|
158
157
|
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
159
158
|
# Constant configuration is similar to the default eight bit configuration except for PoT
|
@@ -164,12 +163,18 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
164
163
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
165
164
|
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
166
165
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
167
|
-
const_configuration_options =
|
166
|
+
const_configuration_options = schema.QuantizationConfigOptions([const_config])
|
168
167
|
|
169
168
|
# Create a TargetPlatformModel and set its default quantization config.
|
170
169
|
# This default configuration will be used for all operations
|
171
170
|
# unless specified otherwise (see OperatorsSet, for example):
|
172
|
-
generated_tpm =
|
171
|
+
generated_tpm = schema.TargetPlatformModel(
|
172
|
+
default_configuration_options,
|
173
|
+
tpc_minor_version=3,
|
174
|
+
tpc_patch_version=0,
|
175
|
+
tpc_platform_type=IMX500_TP_MODEL,
|
176
|
+
add_metadata=True,
|
177
|
+
name=name)
|
173
178
|
|
174
179
|
# To start defining the model's components (such as operator sets, and fusing patterns),
|
175
180
|
# use 'with' the TargetPlatformModel instance, and create them as below:
|
@@ -182,42 +187,42 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
182
187
|
|
183
188
|
# May suit for operations like: Dropout, Reshape, etc.
|
184
189
|
default_qco = tp.get_default_quantization_config_options()
|
185
|
-
|
186
|
-
|
187
|
-
|
190
|
+
schema.OperatorsSet("NoQuantization",
|
191
|
+
default_qco.clone_and_edit(enable_activation_quantization=False)
|
192
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
188
193
|
|
189
194
|
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
190
|
-
mixed_precision_configuration_options =
|
191
|
-
|
195
|
+
mixed_precision_configuration_options = schema.QuantizationConfigOptions(mixed_precision_cfg_list,
|
196
|
+
base_config=base_config)
|
192
197
|
|
193
198
|
# Define operator sets that use mixed_precision_configuration_options:
|
194
|
-
conv =
|
195
|
-
fc =
|
199
|
+
conv = schema.OperatorsSet("Conv", mixed_precision_configuration_options)
|
200
|
+
fc = schema.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
|
196
201
|
|
197
202
|
# Define operations sets without quantization configuration
|
198
203
|
# options (useful for creating fusing patterns, for example):
|
199
|
-
any_relu =
|
200
|
-
add =
|
201
|
-
sub =
|
202
|
-
mul =
|
203
|
-
div =
|
204
|
-
prelu =
|
205
|
-
swish =
|
206
|
-
sigmoid =
|
207
|
-
tanh =
|
204
|
+
any_relu = schema.OperatorsSet("AnyReLU")
|
205
|
+
add = schema.OperatorsSet("Add", const_configuration_options)
|
206
|
+
sub = schema.OperatorsSet("Sub", const_configuration_options)
|
207
|
+
mul = schema.OperatorsSet("Mul", const_configuration_options)
|
208
|
+
div = schema.OperatorsSet("Div", const_configuration_options)
|
209
|
+
prelu = schema.OperatorsSet("PReLU")
|
210
|
+
swish = schema.OperatorsSet("Swish")
|
211
|
+
sigmoid = schema.OperatorsSet("Sigmoid")
|
212
|
+
tanh = schema.OperatorsSet("Tanh")
|
208
213
|
|
209
214
|
# Combine multiple operators into a single operator to avoid quantization between
|
210
215
|
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
211
216
|
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
212
|
-
activations_after_conv_to_fuse =
|
213
|
-
activations_after_fc_to_fuse =
|
214
|
-
any_binary =
|
217
|
+
activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh)
|
218
|
+
activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid)
|
219
|
+
any_binary = schema.OperatorSetConcat(add, sub, mul, div)
|
215
220
|
|
216
221
|
# ------------------- #
|
217
222
|
# Fusions
|
218
223
|
# ------------------- #
|
219
|
-
|
220
|
-
|
221
|
-
|
224
|
+
schema.Fusing([conv, activations_after_conv_to_fuse])
|
225
|
+
schema.Fusing([fc, activations_after_fc_to_fuse])
|
226
|
+
schema.Fusing([any_binary, any_relu])
|
222
227
|
|
223
228
|
return generated_tpm
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
import tensorflow as tf
|
16
16
|
from packaging import version
|
17
17
|
|
18
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
|
18
19
|
from model_compression_toolkit.defaultdict import DefaultDict
|
19
20
|
from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \
|
@@ -48,7 +49,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities:
|
|
48
49
|
return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model)
|
49
50
|
|
50
51
|
|
51
|
-
def generate_keras_tpc(name: str, tp_model:
|
52
|
+
def generate_keras_tpc(name: str, tp_model: TargetPlatformModel):
|
52
53
|
"""
|
53
54
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
54
55
|
|
@@ -59,7 +60,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
59
60
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
60
61
|
"""
|
61
62
|
|
62
|
-
keras_tpc = tp.TargetPlatformCapabilities(tp_model
|
63
|
+
keras_tpc = tp.TargetPlatformCapabilities(tp_model)
|
63
64
|
|
64
65
|
no_quant_list = [Identity,
|
65
66
|
tf.identity,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py
CHANGED
@@ -23,6 +23,7 @@ from torch.nn import Dropout, Flatten, Hardtanh, Identity
|
|
23
23
|
from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
|
24
24
|
from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, leaky_relu
|
25
25
|
|
26
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel
|
26
27
|
from model_compression_toolkit.defaultdict import DefaultDict
|
27
28
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
|
28
29
|
BIAS
|
@@ -42,7 +43,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities:
|
|
42
43
|
return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model)
|
43
44
|
|
44
45
|
|
45
|
-
def generate_pytorch_tpc(name: str, tp_model:
|
46
|
+
def generate_pytorch_tpc(name: str, tp_model: TargetPlatformModel):
|
46
47
|
"""
|
47
48
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
48
49
|
Args:
|
@@ -51,9 +52,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
51
52
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
52
53
|
"""
|
53
54
|
|
54
|
-
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model
|
55
|
-
name=name,
|
56
|
-
version=TPC_VERSION)
|
55
|
+
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model)
|
57
56
|
|
58
57
|
# we provide attributes mapping that maps each layer type in the operations set
|
59
58
|
# that has weights attributes with provided quantization config (in the tp model) to
|
@@ -15,12 +15,12 @@
|
|
15
15
|
from typing import List, Tuple
|
16
16
|
|
17
17
|
import model_compression_toolkit as mct
|
18
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
18
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
|
-
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
AttributeQuantizationConfig
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
21
|
+
IMX500_TP_MODEL
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
|
23
|
+
AttributeQuantizationConfig, OpQuantizationConfig
|
24
24
|
|
25
25
|
tp = mct.target_platform
|
26
26
|
|
@@ -112,7 +112,7 @@ def get_op_quantization_configs() -> \
|
|
112
112
|
|
113
113
|
# We define a default config for operation without kernel attribute.
|
114
114
|
# This is the default config that should be used for non-linear operations.
|
115
|
-
eight_bits_default =
|
115
|
+
eight_bits_default = OpQuantizationConfig(
|
116
116
|
default_weight_attr_config=default_weight_attr_config,
|
117
117
|
attr_weights_configs_mapping={},
|
118
118
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
@@ -126,7 +126,7 @@ def get_op_quantization_configs() -> \
|
|
126
126
|
signedness=Signedness.AUTO)
|
127
127
|
|
128
128
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
129
|
-
linear_eight_bits =
|
129
|
+
linear_eight_bits = OpQuantizationConfig(
|
130
130
|
default_weight_attr_config=default_weight_attr_config,
|
131
131
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
132
132
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
@@ -176,12 +176,13 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
176
176
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
177
177
|
# If the QuantizationConfigOptions contains only one configuration,
|
178
178
|
# this configuration will be used for the operation quantization:
|
179
|
-
default_configuration_options =
|
179
|
+
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
180
180
|
default_config_input16 = default_config.clone_and_edit(supported_input_activation_n_bits=(8, 16))
|
181
|
-
default_config_options_16bit =
|
182
|
-
|
183
|
-
|
184
|
-
|
181
|
+
default_config_options_16bit = schema.QuantizationConfigOptions([default_config_input16,
|
182
|
+
default_config_input16.clone_and_edit(
|
183
|
+
activation_n_bits=16,
|
184
|
+
signedness=Signedness.SIGNED)],
|
185
|
+
base_config=default_config_input16)
|
185
186
|
|
186
187
|
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
187
188
|
# Constant configuration is similar to the default eight bit configuration except for PoT
|
@@ -192,7 +193,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
192
193
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
193
194
|
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
194
195
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
195
|
-
const_configuration_options =
|
196
|
+
const_configuration_options = schema.QuantizationConfigOptions([const_config])
|
196
197
|
|
197
198
|
# 16 bits inputs and outputs. Currently, only defined for consts since they are used in operators that
|
198
199
|
# support 16 bit as input and output.
|
@@ -200,27 +201,28 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
200
201
|
supported_input_activation_n_bits=(8, 16))
|
201
202
|
const_config_input16_output16 = const_config_input16.clone_and_edit(
|
202
203
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
203
|
-
const_configuration_options_inout16 =
|
204
|
-
|
205
|
-
|
204
|
+
const_configuration_options_inout16 = schema.QuantizationConfigOptions([const_config_input16_output16,
|
205
|
+
const_config_input16],
|
206
|
+
base_config=const_config_input16)
|
206
207
|
|
207
208
|
const_config_input16_per_tensor = const_config.clone_and_edit(
|
208
209
|
supported_input_activation_n_bits=(8, 16),
|
209
210
|
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
210
|
-
enable_weights_quantization=True, weights_per_channel_threshold=
|
211
|
+
enable_weights_quantization=True, weights_per_channel_threshold=False,
|
211
212
|
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO)
|
212
213
|
)
|
213
214
|
const_config_input16_output16_per_tensor = const_config_input16_per_tensor.clone_and_edit(
|
214
215
|
activation_n_bits=16, signedness=Signedness.SIGNED)
|
215
|
-
const_configuration_options_inout16_per_tensor =
|
216
|
-
|
217
|
-
|
216
|
+
const_configuration_options_inout16_per_tensor = schema.QuantizationConfigOptions(
|
217
|
+
[const_config_input16_output16_per_tensor,
|
218
|
+
const_config_input16_per_tensor],
|
219
|
+
base_config=const_config_input16_per_tensor)
|
218
220
|
|
219
221
|
qpreserving_const_config = const_config.clone_and_edit(enable_activation_quantization=False,
|
220
222
|
quantization_preserving=True,
|
221
223
|
default_weight_attr_config=const_config.default_weight_attr_config.clone_and_edit(
|
222
224
|
weights_per_channel_threshold=False))
|
223
|
-
qpreserving_const_config_options =
|
225
|
+
qpreserving_const_config_options = schema.QuantizationConfigOptions([qpreserving_const_config])
|
224
226
|
|
225
227
|
mp_cfg_list_16bit = [mp_cfg.clone_and_edit(activation_n_bits=16, signedness=Signedness.SIGNED)
|
226
228
|
for mp_cfg in mixed_precision_cfg_list]
|
@@ -228,7 +230,12 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
228
230
|
# Create a TargetPlatformModel and set its default quantization config.
|
229
231
|
# This default configuration will be used for all operations
|
230
232
|
# unless specified otherwise (see OperatorsSet, for example):
|
231
|
-
generated_tpm =
|
233
|
+
generated_tpm = schema.TargetPlatformModel(
|
234
|
+
default_configuration_options,
|
235
|
+
tpc_minor_version=4,
|
236
|
+
tpc_patch_version=0,
|
237
|
+
tpc_platform_type=IMX500_TP_MODEL,
|
238
|
+
add_metadata=True, name=name)
|
232
239
|
|
233
240
|
# To start defining the model's components (such as operator sets, and fusing patterns),
|
234
241
|
# use 'with' the TargetPlatformModel instance, and create them as below:
|
@@ -243,60 +250,61 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
243
250
|
|
244
251
|
# May suit for operations like: Dropout, Reshape, etc.
|
245
252
|
default_qco = tp.get_default_quantization_config_options()
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
253
|
+
schema.OperatorsSet(OPSET_NO_QUANTIZATION,
|
254
|
+
default_qco.clone_and_edit(enable_activation_quantization=False)
|
255
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
256
|
+
schema.OperatorsSet(OPSET_QUANTIZATION_PRESERVING,
|
257
|
+
default_qco.clone_and_edit(enable_activation_quantization=False,
|
258
|
+
quantization_preserving=True)
|
259
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
260
|
+
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS_WITH_WEIGHTS, qpreserving_const_config_options)
|
261
|
+
schema.OperatorsSet(OPSET_DIMENSION_MANIPULATION_OPS,
|
262
|
+
default_qco.clone_and_edit(enable_activation_quantization=False,
|
263
|
+
quantization_preserving=True,
|
264
|
+
supported_input_activation_n_bits=(8, 16))
|
265
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
266
|
+
schema.OperatorsSet(OPSET_MERGE_OPS, const_configuration_options_inout16_per_tensor)
|
260
267
|
|
261
268
|
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
262
|
-
mixed_precision_configuration_options =
|
263
|
-
|
269
|
+
mixed_precision_configuration_options = schema.QuantizationConfigOptions(
|
270
|
+
mixed_precision_cfg_list + mp_cfg_list_16bit,
|
271
|
+
base_config=base_config)
|
264
272
|
|
265
273
|
# Define operator sets that use mixed_precision_configuration_options:
|
266
|
-
conv =
|
267
|
-
fc =
|
274
|
+
conv = schema.OperatorsSet(OPSET_CONV, mixed_precision_configuration_options)
|
275
|
+
fc = schema.OperatorsSet(OPSET_FULLY_CONNECTED, mixed_precision_configuration_options)
|
268
276
|
|
269
|
-
|
277
|
+
schema.OperatorsSet(OPSET_BATCH_NORM, default_config_options_16bit)
|
270
278
|
|
271
279
|
# Note: Operations sets without quantization configuration are useful for creating fusing patterns
|
272
|
-
any_relu =
|
273
|
-
add =
|
274
|
-
sub =
|
275
|
-
mul =
|
276
|
-
div =
|
277
|
-
|
278
|
-
prelu =
|
279
|
-
swish =
|
280
|
-
sigmoid =
|
281
|
-
tanh =
|
282
|
-
gelu =
|
283
|
-
hardsigmoid =
|
284
|
-
hardswish =
|
280
|
+
any_relu = schema.OperatorsSet(OPSET_ANY_RELU, default_config_options_16bit)
|
281
|
+
add = schema.OperatorsSet(OPSET_ADD, const_configuration_options_inout16)
|
282
|
+
sub = schema.OperatorsSet(OPSET_SUB, const_configuration_options_inout16)
|
283
|
+
mul = schema.OperatorsSet(OPSET_MUL, const_configuration_options_inout16)
|
284
|
+
div = schema.OperatorsSet(OPSET_DIV, const_configuration_options)
|
285
|
+
schema.OperatorsSet(OPSET_MIN_MAX, const_configuration_options_inout16)
|
286
|
+
prelu = schema.OperatorsSet(OPSET_PRELU, default_config_options_16bit)
|
287
|
+
swish = schema.OperatorsSet(OPSET_SWISH, default_config_options_16bit)
|
288
|
+
sigmoid = schema.OperatorsSet(OPSET_SIGMOID, default_config_options_16bit)
|
289
|
+
tanh = schema.OperatorsSet(OPSET_TANH, default_config_options_16bit)
|
290
|
+
gelu = schema.OperatorsSet(OPSET_GELU, default_config_options_16bit)
|
291
|
+
hardsigmoid = schema.OperatorsSet(OPSET_HARDSIGMOID, default_config_options_16bit)
|
292
|
+
hardswish = schema.OperatorsSet(OPSET_HARDSWISH, default_config_options_16bit)
|
285
293
|
|
286
294
|
# Combine multiple operators into a single operator to avoid quantization between
|
287
295
|
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
288
296
|
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
289
|
-
activations_after_conv_to_fuse =
|
290
|
-
|
291
|
-
activations_after_fc_to_fuse =
|
292
|
-
|
293
|
-
any_binary =
|
297
|
+
activations_after_conv_to_fuse = schema.OperatorSetConcat(any_relu, swish, prelu, sigmoid,
|
298
|
+
tanh, gelu, hardswish, hardsigmoid)
|
299
|
+
activations_after_fc_to_fuse = schema.OperatorSetConcat(any_relu, swish, sigmoid, tanh, gelu,
|
300
|
+
hardswish, hardsigmoid)
|
301
|
+
any_binary = schema.OperatorSetConcat(add, sub, mul, div)
|
294
302
|
|
295
303
|
# ------------------- #
|
296
304
|
# Fusions
|
297
305
|
# ------------------- #
|
298
|
-
|
299
|
-
|
300
|
-
|
306
|
+
schema.Fusing([conv, activations_after_conv_to_fuse])
|
307
|
+
schema.Fusing([fc, activations_after_fc_to_fuse])
|
308
|
+
schema.Fusing([any_binary, any_relu])
|
301
309
|
|
302
310
|
return generated_tpm
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
import tensorflow as tf
|
16
16
|
from packaging import version
|
17
17
|
|
18
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
18
19
|
from model_compression_toolkit.defaultdict import DefaultDict
|
19
20
|
from model_compression_toolkit.verify_packages import FOUND_SONY_CUSTOM_LAYERS
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_DEPTHWISE_KERNEL, \
|
@@ -53,7 +54,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities:
|
|
53
54
|
return generate_keras_tpc(name='imx500_tpc_keras_tpc', tp_model=imx500_tpc_tp_model)
|
54
55
|
|
55
56
|
|
56
|
-
def generate_keras_tpc(name: str, tp_model:
|
57
|
+
def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
57
58
|
"""
|
58
59
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
59
60
|
|
@@ -64,7 +65,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
64
65
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
65
66
|
"""
|
66
67
|
|
67
|
-
keras_tpc = tp.TargetPlatformCapabilities(tp_model
|
68
|
+
keras_tpc = tp.TargetPlatformCapabilities(tp_model)
|
68
69
|
|
69
70
|
no_quant_list = [tf.quantization.fake_quant_with_min_max_vars,
|
70
71
|
tf.math.argmax,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py
CHANGED
@@ -25,6 +25,7 @@ from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, Hardsig
|
|
25
25
|
import torch.nn.functional as F
|
26
26
|
from torch.nn.functional import relu, relu6, prelu, silu, hardtanh, hardswish, hardsigmoid, leaky_relu, gelu
|
27
27
|
|
28
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
28
29
|
from model_compression_toolkit.defaultdict import DefaultDict
|
29
30
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, PYTORCH_KERNEL, \
|
30
31
|
BIAS
|
@@ -50,7 +51,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities:
|
|
50
51
|
return generate_pytorch_tpc(name='imx500_tpc_pytorch_tpc', tp_model=imx500_tpc_tp_model)
|
51
52
|
|
52
53
|
|
53
|
-
def generate_pytorch_tpc(name: str, tp_model:
|
54
|
+
def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
54
55
|
"""
|
55
56
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
56
57
|
Args:
|
@@ -59,9 +60,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
59
60
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
60
61
|
"""
|
61
62
|
|
62
|
-
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model
|
63
|
-
name=name,
|
64
|
-
version=TPC_VERSION)
|
63
|
+
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model)
|
65
64
|
|
66
65
|
# we provide attributes mapping that maps each layer type in the operations set
|
67
66
|
# that has weights attributes with provided quantization config (in the tp model) to
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py
CHANGED
@@ -15,12 +15,11 @@
|
|
15
15
|
from typing import List, Tuple
|
16
16
|
|
17
17
|
import model_compression_toolkit as mct
|
18
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
18
19
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
|
-
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR
|
20
|
-
from model_compression_toolkit.target_platform_capabilities.
|
21
|
-
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
|
-
AttributeQuantizationConfig
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL
|
21
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, Signedness, \
|
22
|
+
AttributeQuantizationConfig, OpQuantizationConfig
|
24
23
|
|
25
24
|
tp = mct.target_platform
|
26
25
|
|
@@ -85,7 +84,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
85
84
|
|
86
85
|
# We define a default config for operation without kernel attribute.
|
87
86
|
# This is the default config that should be used for non-linear operations.
|
88
|
-
eight_bits_default =
|
87
|
+
eight_bits_default = schema.OpQuantizationConfig(
|
89
88
|
default_weight_attr_config=default_weight_attr_config,
|
90
89
|
attr_weights_configs_mapping={},
|
91
90
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
@@ -99,7 +98,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
99
98
|
signedness=Signedness.AUTO)
|
100
99
|
|
101
100
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
102
|
-
linear_eight_bits =
|
101
|
+
linear_eight_bits = schema.OpQuantizationConfig(
|
103
102
|
activation_quantization_method=tp.QuantizationMethod.UNIFORM,
|
104
103
|
default_weight_attr_config=default_weight_attr_config,
|
105
104
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
@@ -139,12 +138,18 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
139
138
|
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
140
139
|
# If the QuantizationConfigOptions contains only one configuration,
|
141
140
|
# this configuration will be used for the operation quantization:
|
142
|
-
default_configuration_options =
|
141
|
+
default_configuration_options = schema.QuantizationConfigOptions([default_config])
|
143
142
|
|
144
143
|
# Create a TargetPlatformModel and set its default quantization config.
|
145
144
|
# This default configuration will be used for all operations
|
146
145
|
# unless specified otherwise (see OperatorsSet, for example):
|
147
|
-
generated_tpc =
|
146
|
+
generated_tpc = schema.TargetPlatformModel(
|
147
|
+
default_configuration_options,
|
148
|
+
tpc_minor_version=1,
|
149
|
+
tpc_patch_version=0,
|
150
|
+
tpc_platform_type=QNNPACK_TP_MODEL,
|
151
|
+
add_metadata=False,
|
152
|
+
name=name)
|
148
153
|
|
149
154
|
# To start defining the model's components (such as operator sets, and fusing patterns),
|
150
155
|
# use 'with' the target platform model instance, and create them as below:
|
@@ -153,17 +158,17 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
153
158
|
# Pytorch supports the next fusing patterns:
|
154
159
|
# [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
|
155
160
|
# Source: # https://pytorch.org/docs/stable/quantization.html#model-preparation-for-quantization-eager-mode
|
156
|
-
conv =
|
157
|
-
batchnorm =
|
158
|
-
relu =
|
159
|
-
linear =
|
161
|
+
conv = schema.OperatorsSet("Conv")
|
162
|
+
batchnorm = schema.OperatorsSet("BatchNorm")
|
163
|
+
relu = schema.OperatorsSet("Relu")
|
164
|
+
linear = schema.OperatorsSet("Linear")
|
160
165
|
|
161
166
|
# ------------------- #
|
162
167
|
# Fusions
|
163
168
|
# ------------------- #
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
169
|
+
schema.Fusing([conv, batchnorm, relu])
|
170
|
+
schema.Fusing([conv, batchnorm])
|
171
|
+
schema.Fusing([conv, relu])
|
172
|
+
schema.Fusing([linear, relu])
|
168
173
|
|
169
174
|
return generated_tpc
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_keras.py
CHANGED
@@ -16,6 +16,7 @@ import tensorflow as tf
|
|
16
16
|
|
17
17
|
from packaging import version
|
18
18
|
|
19
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
20
|
from model_compression_toolkit.defaultdict import DefaultDict
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, KERAS_KERNEL, BIAS_ATTR, \
|
21
22
|
KERAS_DEPTHWISE_KERNEL, BIAS
|
@@ -41,7 +42,7 @@ def get_keras_tpc() -> tp.TargetPlatformCapabilities:
|
|
41
42
|
return generate_keras_tpc(name='qnnpack_keras', tp_model=qnnpack_tp_model)
|
42
43
|
|
43
44
|
|
44
|
-
def generate_keras_tpc(name: str, tp_model:
|
45
|
+
def generate_keras_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
45
46
|
"""
|
46
47
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
47
48
|
|
@@ -52,9 +53,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
52
53
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
53
54
|
"""
|
54
55
|
|
55
|
-
keras_tpc = tp.TargetPlatformCapabilities(tp_model
|
56
|
-
name=name,
|
57
|
-
version=TPC_VERSION)
|
56
|
+
keras_tpc = tp.TargetPlatformCapabilities(tp_model)
|
58
57
|
|
59
58
|
with keras_tpc:
|
60
59
|
tp.OperationsSetToLayers("Conv",
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tpc_pytorch.py
CHANGED
@@ -16,6 +16,7 @@ import torch
|
|
16
16
|
from torch.nn import Conv2d, Linear, BatchNorm2d, ConvTranspose2d, Hardtanh, ReLU, ReLU6
|
17
17
|
from torch.nn.functional import relu, relu6, hardtanh
|
18
18
|
|
19
|
+
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
20
|
from model_compression_toolkit.defaultdict import DefaultDict
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, PYTORCH_KERNEL, BIAS_ATTR, \
|
21
22
|
BIAS
|
@@ -35,7 +36,7 @@ def get_pytorch_tpc() -> tp.TargetPlatformCapabilities:
|
|
35
36
|
return generate_pytorch_tpc(name='qnnpack_pytorch', tp_model=qnnpack_pytorch)
|
36
37
|
|
37
38
|
|
38
|
-
def generate_pytorch_tpc(name: str, tp_model:
|
39
|
+
def generate_pytorch_tpc(name: str, tp_model: schema.TargetPlatformModel):
|
39
40
|
"""
|
40
41
|
Generates a TargetPlatformCapabilities object with default operation sets to layers mapping.
|
41
42
|
Args:
|
@@ -44,9 +45,7 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
44
45
|
Returns: a TargetPlatformCapabilities object for the given TargetPlatformModel.
|
45
46
|
"""
|
46
47
|
|
47
|
-
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model
|
48
|
-
name=name,
|
49
|
-
version=TPC_VERSION)
|
48
|
+
pytorch_tpc = tp.TargetPlatformCapabilities(tp_model)
|
50
49
|
|
51
50
|
# we provide attributes mapping that maps each layer type in the operations set
|
52
51
|
# that has weights attributes with provided quantization config (in the tp model) to
|