mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -16,36 +16,36 @@ from typing import List, Tuple
|
|
16
16
|
|
17
17
|
import model_compression_toolkit as mct
|
18
18
|
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
|
+
from mct_quantizers import QuantizationMethod
|
19
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, QNNPACK_TP_MODEL
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities, \
|
22
23
|
Signedness, \
|
23
24
|
AttributeQuantizationConfig, OpQuantizationConfig
|
24
25
|
|
25
|
-
tp = mct.target_platform
|
26
26
|
|
27
27
|
|
28
|
-
def
|
28
|
+
def get_tpc() -> TargetPlatformCapabilities:
|
29
29
|
"""
|
30
30
|
A method that generates a default target platform model, with base 8-bit quantization configuration and 8, 4, 2
|
31
31
|
bits configuration list for mixed-precision quantization.
|
32
32
|
NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
|
33
33
|
(for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
|
34
|
-
'get_op_quantization_configs' method and use its output to call '
|
34
|
+
'get_op_quantization_configs' method and use its output to call 'generate_tpc' with your configurations.
|
35
35
|
|
36
|
-
Returns: A
|
36
|
+
Returns: A TargetPlatformCapabilities object.
|
37
37
|
|
38
38
|
"""
|
39
39
|
base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
|
40
|
-
return
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
return generate_tpc(default_config=default_config,
|
41
|
+
base_config=base_config,
|
42
|
+
mixed_precision_cfg_list=mixed_precision_cfg_list,
|
43
|
+
name='qnnpack_tpc')
|
44
44
|
|
45
45
|
|
46
46
|
def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
47
47
|
"""
|
48
|
-
Creates a default configuration object for 8-bit quantization, to be used to set a default
|
48
|
+
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformCapabilities.
|
49
49
|
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
50
50
|
default configuration for mixed-precision quantization.
|
51
51
|
|
@@ -55,7 +55,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
55
55
|
|
56
56
|
# We define a default quantization config for all non-specified weights attributes.
|
57
57
|
default_weight_attr_config = AttributeQuantizationConfig(
|
58
|
-
weights_quantization_method=
|
58
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
59
59
|
weights_n_bits=8,
|
60
60
|
weights_per_channel_threshold=False,
|
61
61
|
enable_weights_quantization=False,
|
@@ -63,7 +63,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
63
63
|
|
64
64
|
# We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
65
65
|
kernel_base_config = AttributeQuantizationConfig(
|
66
|
-
weights_quantization_method=
|
66
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
67
67
|
weights_n_bits=8,
|
68
68
|
weights_per_channel_threshold=False,
|
69
69
|
enable_weights_quantization=True,
|
@@ -71,7 +71,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
71
71
|
|
72
72
|
# We define a quantization config to quantize the bias (for layers where there is a bias attribute).
|
73
73
|
bias_config = AttributeQuantizationConfig(
|
74
|
-
weights_quantization_method=
|
74
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
75
75
|
weights_n_bits=FLOAT_BITWIDTH,
|
76
76
|
weights_per_channel_threshold=False,
|
77
77
|
enable_weights_quantization=False,
|
@@ -88,7 +88,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
88
88
|
eight_bits_default = schema.OpQuantizationConfig(
|
89
89
|
default_weight_attr_config=default_weight_attr_config,
|
90
90
|
attr_weights_configs_mapping={},
|
91
|
-
activation_quantization_method=
|
91
|
+
activation_quantization_method=QuantizationMethod.POWER_OF_TWO,
|
92
92
|
activation_n_bits=8,
|
93
93
|
supported_input_activation_n_bits=8,
|
94
94
|
enable_activation_quantization=True,
|
@@ -100,7 +100,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
100
100
|
|
101
101
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
102
102
|
linear_eight_bits = schema.OpQuantizationConfig(
|
103
|
-
activation_quantization_method=
|
103
|
+
activation_quantization_method=QuantizationMethod.UNIFORM,
|
104
104
|
default_weight_attr_config=default_weight_attr_config,
|
105
105
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
106
106
|
activation_n_bits=8,
|
@@ -117,22 +117,22 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
117
117
|
return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
|
118
118
|
|
119
119
|
|
120
|
-
def
|
121
|
-
|
122
|
-
|
123
|
-
|
120
|
+
def generate_tpc(default_config: OpQuantizationConfig,
|
121
|
+
base_config: OpQuantizationConfig,
|
122
|
+
mixed_precision_cfg_list: List[OpQuantizationConfig],
|
123
|
+
name: str) -> TargetPlatformCapabilities:
|
124
124
|
"""
|
125
|
-
Generates
|
125
|
+
Generates TargetPlatformCapabilities with default defined Operators Sets, based on the given base configuration and
|
126
126
|
mixed-precision configurations options list.
|
127
127
|
|
128
128
|
Args
|
129
129
|
default_config: A default OpQuantizationConfig to set as the TP model default configuration.
|
130
|
-
base_config: An OpQuantizationConfig to set as the
|
130
|
+
base_config: An OpQuantizationConfig to set as the TargetPlatformCapabilities base configuration for mixed-precision purposes only.
|
131
131
|
mixed_precision_cfg_list: A list of OpQuantizationConfig to be used as the TP model mixed-precision
|
132
132
|
quantization configuration options.
|
133
|
-
name: The name of the
|
133
|
+
name: The name of the TargetPlatformCapabilities.
|
134
134
|
|
135
|
-
Returns: A
|
135
|
+
Returns: A TargetPlatformCapabilities object.
|
136
136
|
|
137
137
|
"""
|
138
138
|
# Create a QuantizationConfigOptions, which defines a set
|
@@ -160,8 +160,8 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
160
160
|
|
161
161
|
operator_set.extend([conv, conv_depthwise, conv_transpose, batchnorm, relu, relu6, hard_tanh, linear])
|
162
162
|
|
163
|
-
conv_opset_concat = schema.
|
164
|
-
relu_opset_concat = schema.
|
163
|
+
conv_opset_concat = schema.OperatorSetGroup(operators_set=[conv, conv_transpose])
|
164
|
+
relu_opset_concat = schema.OperatorSetGroup(operators_set=[relu, relu6, hard_tanh])
|
165
165
|
|
166
166
|
# ------------------- #
|
167
167
|
# Fusions
|
@@ -171,10 +171,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
171
171
|
fusing_patterns.append(schema.Fusing(operator_groups=(conv_opset_concat, relu_opset_concat)))
|
172
172
|
fusing_patterns.append(schema.Fusing(operator_groups=(linear, relu_opset_concat)))
|
173
173
|
|
174
|
-
# Create a
|
174
|
+
# Create a TargetPlatformCapabilities and set its default quantization config.
|
175
175
|
# This default configuration will be used for all operations
|
176
176
|
# unless specified otherwise (see OperatorsSet, for example):
|
177
|
-
generated_tpc = schema.
|
177
|
+
generated_tpc = schema.TargetPlatformCapabilities(
|
178
178
|
default_qco=default_configuration_options,
|
179
179
|
tpc_minor_version=1,
|
180
180
|
tpc_patch_version=0,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py
CHANGED
@@ -13,13 +13,13 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
from model_compression_toolkit.verify_packages import FOUND_TORCH, FOUND_TF
|
16
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.
|
16
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tpc import get_tpc, generate_tpc, get_op_quantization_configs
|
17
17
|
if FOUND_TF:
|
18
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.
|
18
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tpc import get_keras_tpc as get_keras_tpc_latest
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.get_target_platform_capabilities import \
|
20
20
|
get_tpc_model as generate_keras_tpc
|
21
21
|
if FOUND_TORCH:
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.
|
23
|
-
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tpc import \
|
23
|
+
get_tpc as get_pytorch_tpc_latest
|
24
24
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.get_target_platform_capabilities import \
|
25
25
|
get_tpc_model as generate_pytorch_tpc
|
@@ -16,35 +16,35 @@ from typing import List, Tuple
|
|
16
16
|
|
17
17
|
import model_compression_toolkit as mct
|
18
18
|
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
|
19
|
+
from mct_quantizers import QuantizationMethod
|
19
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
20
21
|
from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR, TFLITE_TP_MODEL
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities, Signedness, \
|
22
23
|
AttributeQuantizationConfig, OpQuantizationConfig
|
23
24
|
|
24
|
-
tp = mct.target_platform
|
25
25
|
|
26
26
|
|
27
|
-
def
|
27
|
+
def get_tpc() -> TargetPlatformCapabilities:
|
28
28
|
"""
|
29
29
|
A method that generates a default target platform model, with base 8-bit quantization configuration and 8, 4, 2
|
30
30
|
bits configuration list for mixed-precision quantization.
|
31
31
|
NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
|
32
32
|
(for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
|
33
|
-
'get_op_quantization_configs' method and use its output to call '
|
33
|
+
'get_op_quantization_configs' method and use its output to call 'generate_tpc' with your configurations.
|
34
34
|
|
35
|
-
Returns: A
|
35
|
+
Returns: A TargetPlatformCapabilities object.
|
36
36
|
|
37
37
|
"""
|
38
38
|
base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
|
39
|
-
return
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
return generate_tpc(default_config=default_config,
|
40
|
+
base_config=base_config,
|
41
|
+
mixed_precision_cfg_list=mixed_precision_cfg_list,
|
42
|
+
name='tflite_tpc')
|
43
43
|
|
44
44
|
|
45
45
|
def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
46
46
|
"""
|
47
|
-
Creates a default configuration object for 8-bit quantization, to be used to set a default
|
47
|
+
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformCapabilities.
|
48
48
|
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
49
49
|
default configuration for mixed-precision quantization.
|
50
50
|
|
@@ -54,7 +54,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
54
54
|
|
55
55
|
# We define a default quantization config for all non-specified weights attributes.
|
56
56
|
default_weight_attr_config = AttributeQuantizationConfig(
|
57
|
-
weights_quantization_method=
|
57
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
58
58
|
weights_n_bits=8,
|
59
59
|
weights_per_channel_threshold=False,
|
60
60
|
enable_weights_quantization=False,
|
@@ -62,7 +62,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
62
62
|
|
63
63
|
# We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
64
64
|
kernel_base_config = AttributeQuantizationConfig(
|
65
|
-
weights_quantization_method=
|
65
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
66
66
|
weights_n_bits=8,
|
67
67
|
weights_per_channel_threshold=True,
|
68
68
|
enable_weights_quantization=True,
|
@@ -70,7 +70,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
70
70
|
|
71
71
|
# We define a quantization config to quantize the bias (for layers where there is a bias attribute).
|
72
72
|
bias_config = AttributeQuantizationConfig(
|
73
|
-
weights_quantization_method=
|
73
|
+
weights_quantization_method=QuantizationMethod.SYMMETRIC,
|
74
74
|
weights_n_bits=FLOAT_BITWIDTH,
|
75
75
|
weights_per_channel_threshold=False,
|
76
76
|
enable_weights_quantization=False,
|
@@ -85,7 +85,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
85
85
|
eight_bits_default = schema.OpQuantizationConfig(
|
86
86
|
default_weight_attr_config=default_weight_attr_config,
|
87
87
|
attr_weights_configs_mapping={},
|
88
|
-
activation_quantization_method=
|
88
|
+
activation_quantization_method=QuantizationMethod.POWER_OF_TWO,
|
89
89
|
activation_n_bits=8,
|
90
90
|
supported_input_activation_n_bits=8,
|
91
91
|
enable_activation_quantization=True,
|
@@ -97,7 +97,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
97
97
|
|
98
98
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
99
99
|
linear_eight_bits = schema.OpQuantizationConfig(
|
100
|
-
activation_quantization_method=
|
100
|
+
activation_quantization_method=QuantizationMethod.UNIFORM,
|
101
101
|
default_weight_attr_config=default_weight_attr_config,
|
102
102
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
103
103
|
activation_n_bits=8,
|
@@ -114,22 +114,22 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
114
114
|
return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
|
115
115
|
|
116
116
|
|
117
|
-
def
|
118
|
-
|
119
|
-
|
120
|
-
|
117
|
+
def generate_tpc(default_config: OpQuantizationConfig,
|
118
|
+
base_config: OpQuantizationConfig,
|
119
|
+
mixed_precision_cfg_list: List[OpQuantizationConfig],
|
120
|
+
name: str) -> TargetPlatformCapabilities:
|
121
121
|
"""
|
122
|
-
Generates
|
122
|
+
Generates TargetPlatformCapabilities with default defined Operators Sets, based on the given base configuration and
|
123
123
|
mixed-precision configurations options list.
|
124
124
|
|
125
125
|
Args
|
126
126
|
default_config: A default OpQuantizationConfig to set as the TP model default configuration.
|
127
|
-
base_config: An OpQuantizationConfig to set as the
|
127
|
+
base_config: An OpQuantizationConfig to set as the TargetPlatformCapabilities base configuration for mixed-precision purposes only.
|
128
128
|
mixed_precision_cfg_list: A list of OpQuantizationConfig to be used as the TP model mixed-precision
|
129
129
|
quantization configuration options.
|
130
|
-
name: The name of the
|
130
|
+
name: The name of the TargetPlatformCapabilities.
|
131
131
|
|
132
|
-
Returns: A
|
132
|
+
Returns: A TargetPlatformCapabilities object.
|
133
133
|
|
134
134
|
"""
|
135
135
|
# Create a QuantizationConfigOptions, which defines a set
|
@@ -193,8 +193,8 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
193
193
|
add = schema.OperatorsSet(name=schema.OperatorSetNames.ADD)
|
194
194
|
bias_add = schema.OperatorsSet(name=schema.OperatorSetNames.ADD_BIAS)
|
195
195
|
|
196
|
-
kernel = schema.
|
197
|
-
activations_to_fuse = schema.
|
196
|
+
kernel = schema.OperatorSetGroup(operators_set=[conv2d, fc])
|
197
|
+
activations_to_fuse = schema.OperatorSetGroup(operators_set=[relu, elu])
|
198
198
|
|
199
199
|
operator_set.extend([fc, conv2d, relu, relu6, tanh, sigmoid, batch_norm, add, bias_add, elu, squeeze])
|
200
200
|
|
@@ -209,10 +209,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
209
209
|
fusing_patterns.append(schema.Fusing(operator_groups=(batch_norm, activations_to_fuse)))
|
210
210
|
fusing_patterns.append(schema.Fusing(operator_groups=(batch_norm, add, activations_to_fuse)))
|
211
211
|
|
212
|
-
# Create a
|
212
|
+
# Create a TargetPlatformCapabilities and set its default quantization config.
|
213
213
|
# This default configuration will be used for all operations
|
214
214
|
# unless specified otherwise (see OperatorsSet, for example):
|
215
|
-
generated_tpc = schema.
|
215
|
+
generated_tpc = schema.TargetPlatformCapabilities(
|
216
216
|
default_qco=default_configuration_options,
|
217
217
|
tpc_minor_version=1,
|
218
218
|
tpc_patch_version=0,
|
@@ -15,8 +15,7 @@
|
|
15
15
|
from typing import Union, Any
|
16
16
|
|
17
17
|
from model_compression_toolkit.logger import Logger
|
18
|
-
from
|
19
|
-
from mct_quantizers import QuantizationTarget
|
18
|
+
from mct_quantizers import QuantizationTarget, QuantizationMethod
|
20
19
|
from mct_quantizers.common.constants \
|
21
20
|
import QUANTIZATION_TARGET, QUANTIZATION_METHOD, QUANTIZER_ID
|
22
21
|
from mct_quantizers.common.get_all_subclasses \
|
@@ -14,7 +14,8 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
from abc import ABC
|
16
16
|
from typing import Dict, List
|
17
|
-
|
17
|
+
|
18
|
+
from mct_quantizers import QuantizationMethod
|
18
19
|
|
19
20
|
|
20
21
|
class TrainableQuantizerCandidateConfig:
|
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py
CHANGED
@@ -23,9 +23,8 @@ from model_compression_toolkit.constants import SIGNED
|
|
23
23
|
|
24
24
|
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
25
25
|
|
26
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
27
26
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
28
|
-
from mct_quantizers import QuantizationTarget, mark_quantizer
|
27
|
+
from mct_quantizers import QuantizationTarget, mark_quantizer, QuantizationMethod
|
29
28
|
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
30
29
|
from model_compression_toolkit import constants as C
|
31
30
|
|
@@ -19,7 +19,7 @@ from enum import Enum
|
|
19
19
|
|
20
20
|
import numpy as np
|
21
21
|
|
22
|
-
from
|
22
|
+
from mct_quantizers import QuantizationMethod
|
23
23
|
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
24
24
|
TrainableQuantizerActivationConfig, TrainableQuantizerWeightsConfig
|
25
25
|
from mct_quantizers.common import constants as C
|
@@ -23,7 +23,8 @@ from model_compression_toolkit.core.graph_prep_runner import graph_preparation_r
|
|
23
23
|
from typing import Any, Callable
|
24
24
|
|
25
25
|
from model_compression_toolkit.core.common import Graph
|
26
|
-
from model_compression_toolkit.target_platform_capabilities.
|
26
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import \
|
27
|
+
FrameworkQuantizationCapabilities
|
27
28
|
|
28
29
|
|
29
30
|
class ModelFoldingUtils:
|
@@ -35,19 +36,19 @@ class ModelFoldingUtils:
|
|
35
36
|
def __init__(self,
|
36
37
|
fw_info: FrameworkInfo,
|
37
38
|
fw_impl: FrameworkImplementation,
|
38
|
-
|
39
|
+
fw_default_fqc: FrameworkQuantizationCapabilities):
|
39
40
|
"""
|
40
41
|
Initialize the ModelFoldingUtils class with framework-specific information, implementation details,
|
41
|
-
and default
|
42
|
+
and default FQC.
|
42
43
|
|
43
44
|
Args:
|
44
45
|
fw_info: Framework-specific information.
|
45
46
|
fw_impl: Implementation functions for the framework.
|
46
|
-
|
47
|
+
fw_default_fqc: Default target platform capabilities for the handled framework.
|
47
48
|
"""
|
48
49
|
self.fw_info = fw_info
|
49
50
|
self.fw_impl = fw_impl
|
50
|
-
self.
|
51
|
+
self.fw_default_fqc = fw_default_fqc
|
51
52
|
|
52
53
|
def create_float_folded_model(self, float_model: Any, representative_dataset: Any = None) -> Any:
|
53
54
|
"""
|
@@ -101,5 +102,5 @@ class ModelFoldingUtils:
|
|
101
102
|
fw_impl=self.fw_impl,
|
102
103
|
fw_info=self.fw_info,
|
103
104
|
quantization_config=DEFAULTCONFIG,
|
104
|
-
|
105
|
+
fqc=self.fw_default_fqc)
|
105
106
|
return graph
|
@@ -17,6 +17,8 @@ from model_compression_toolkit import get_target_platform_capabilities
|
|
17
17
|
from model_compression_toolkit.constants import TENSORFLOW
|
18
18
|
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
|
19
19
|
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2keras import \
|
21
|
+
AttachTpcToKeras
|
20
22
|
from model_compression_toolkit.xquant.common.framework_report_utils import FrameworkReportUtils
|
21
23
|
from model_compression_toolkit.xquant.common.model_folding_utils import ModelFoldingUtils
|
22
24
|
from model_compression_toolkit.xquant.common.similarity_calculator import SimilarityCalculator
|
@@ -27,8 +29,6 @@ from model_compression_toolkit.xquant.keras.similarity_functions import KerasSim
|
|
27
29
|
from model_compression_toolkit.xquant.keras.tensorboard_utils import KerasTensorboardUtils
|
28
30
|
from mct_quantizers.keras.metadata import get_metadata
|
29
31
|
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
|
30
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
|
31
|
-
AttachTpcToKeras
|
32
32
|
|
33
33
|
|
34
34
|
class KerasReportUtils(FrameworkReportUtils):
|
@@ -46,12 +46,12 @@ class KerasReportUtils(FrameworkReportUtils):
|
|
46
46
|
# Set the default Target Platform Capabilities (TPC) for Keras.
|
47
47
|
default_tpc = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
|
48
48
|
attach2pytorch = AttachTpcToKeras()
|
49
|
-
|
49
|
+
framework_platform_capabilities = attach2pytorch.attach(default_tpc)
|
50
50
|
|
51
51
|
dataset_utils = KerasDatasetUtils()
|
52
52
|
model_folding = ModelFoldingUtils(fw_info=fw_info,
|
53
53
|
fw_impl=fw_impl,
|
54
|
-
|
54
|
+
fw_default_fqc=framework_platform_capabilities)
|
55
55
|
|
56
56
|
similarity_calculator = SimilarityCalculator(dataset_utils=dataset_utils,
|
57
57
|
model_folding=model_folding,
|
@@ -16,7 +16,7 @@ from model_compression_toolkit import get_target_platform_capabilities
|
|
16
16
|
from model_compression_toolkit.constants import PYTORCH
|
17
17
|
from model_compression_toolkit.core.pytorch.utils import get_working_device
|
18
18
|
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
|
19
|
-
from model_compression_toolkit.target_platform_capabilities.
|
19
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2pytorch import \
|
20
20
|
AttachTpcToPytorch
|
21
21
|
|
22
22
|
from model_compression_toolkit.xquant.common.framework_report_utils import FrameworkReportUtils
|
@@ -44,12 +44,12 @@ class PytorchReportUtils(FrameworkReportUtils):
|
|
44
44
|
# Set the default Target Platform Capabilities (TPC) for PyTorch.
|
45
45
|
default_tpc = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
|
46
46
|
attach2pytorch = AttachTpcToPytorch()
|
47
|
-
|
47
|
+
framework_quantization_capabilities = attach2pytorch.attach(default_tpc)
|
48
48
|
|
49
49
|
dataset_utils = PytorchDatasetUtils()
|
50
50
|
model_folding = ModelFoldingUtils(fw_info=fw_info,
|
51
51
|
fw_impl=fw_impl,
|
52
|
-
|
52
|
+
fw_default_fqc=framework_quantization_capabilities)
|
53
53
|
|
54
54
|
similarity_calculator = SimilarityCalculator(dataset_utils=dataset_utils,
|
55
55
|
model_folding=model_folding,
|
@@ -1,105 +0,0 @@
|
|
1
|
-
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
import copy
|
16
|
-
from enum import Enum
|
17
|
-
from functools import partial
|
18
|
-
from typing import List, Any
|
19
|
-
import numpy as np
|
20
|
-
|
21
|
-
from pulp import lpSum
|
22
|
-
|
23
|
-
|
24
|
-
def sum_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[Any]:
|
25
|
-
"""
|
26
|
-
Aggregates resource utilization vector to a single resource utilization measure by summing all values.
|
27
|
-
|
28
|
-
Args:
|
29
|
-
ru_vector: A vector with nodes' resource utilization values.
|
30
|
-
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
31
|
-
given config not for LP formalization purposes.
|
32
|
-
|
33
|
-
Returns: A list with an lpSum object for lp problem definition with the vector's sum.
|
34
|
-
|
35
|
-
"""
|
36
|
-
if set_constraints:
|
37
|
-
return [lpSum(ru_vector)]
|
38
|
-
return [0] if len(ru_vector) == 0 else [sum(ru_vector)]
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def max_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[float]:
|
43
|
-
"""
|
44
|
-
Aggregates resource utilization vector to allow max constraint in the linear programming problem formalization.
|
45
|
-
In order to do so, we need to define a separate constraint on each value in the resource utilization vector,
|
46
|
-
to be bounded by the target resource utilization.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
ru_vector: A vector with nodes' resource utilization values.
|
50
|
-
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
51
|
-
given config not for LP formalization purposes.
|
52
|
-
|
53
|
-
Returns: A list with the vector's values, to be used to define max constraint
|
54
|
-
in the linear programming problem formalization.
|
55
|
-
|
56
|
-
"""
|
57
|
-
if set_constraints:
|
58
|
-
return [ru for ru in ru_vector]
|
59
|
-
return [0] if len(ru_vector) == 0 else [max(ru_vector)]
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def total_ru(ru_tensor: np.ndarray, set_constraints: bool = True) -> List[float]:
|
64
|
-
"""
|
65
|
-
Aggregates resource utilization vector to allow weights and activation total utilization constraint in the linear programming
|
66
|
-
problem formalization. In order to do so, we need to define a separate constraint on each activation memory utilization value in
|
67
|
-
the resource utilization vector, combined with the sum weights memory utilization.
|
68
|
-
Note that the given ru_tensor should contain weights and activation utilization values in each entry.
|
69
|
-
|
70
|
-
Args:
|
71
|
-
ru_tensor: A tensor with nodes' resource utilization values for weights and activation.
|
72
|
-
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
73
|
-
given config not for LP formalization purposes.
|
74
|
-
|
75
|
-
Returns: A list with lpSum objects, to be used to define total constraint
|
76
|
-
in the linear programming problem formalization.
|
77
|
-
|
78
|
-
"""
|
79
|
-
if set_constraints:
|
80
|
-
weights_ru = lpSum([ru[0] for ru in ru_tensor])
|
81
|
-
return [weights_ru + activation_ru for _, activation_ru in ru_tensor]
|
82
|
-
else:
|
83
|
-
weights_ru = sum([ru[0] for ru in ru_tensor])
|
84
|
-
activation_ru = max([ru[1] for ru in ru_tensor])
|
85
|
-
return [weights_ru + activation_ru]
|
86
|
-
|
87
|
-
|
88
|
-
class MpRuAggregation(Enum):
|
89
|
-
"""
|
90
|
-
Defines resource utilization aggregation functions that can be used to compute final resource utilization metric.
|
91
|
-
The enum values can be used to call a function on a set of arguments.
|
92
|
-
|
93
|
-
SUM - applies the sum_ru_values function
|
94
|
-
|
95
|
-
MAX - applies the max_ru_values function
|
96
|
-
|
97
|
-
TOTAL - applies the total_ru function
|
98
|
-
|
99
|
-
"""
|
100
|
-
SUM = partial(sum_ru_values)
|
101
|
-
MAX = partial(max_ru_values)
|
102
|
-
TOTAL = partial(total_ru)
|
103
|
-
|
104
|
-
def __call__(self, *args):
|
105
|
-
return self.value(*args)
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
from typing import NamedTuple
|
16
|
-
|
17
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
|
18
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
19
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
20
|
-
|
21
|
-
|
22
|
-
# When adding a RUTarget that we want to consider in our mp search,
|
23
|
-
# a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
|
24
|
-
# aggregation function should be added to this dictionary
|
25
|
-
class RuFunctions(NamedTuple):
|
26
|
-
metric_fn: MpRuMetric
|
27
|
-
aggregate_fn: MpRuAggregation
|
28
|
-
|
29
|
-
|
30
|
-
ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
|
31
|
-
RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_MAXCUT_SIZE, MpRuAggregation.MAX),
|
32
|
-
RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
|
33
|
-
RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
|