mct-nightly 2.1.0.20240806.441__py3-none-any.whl → 2.1.0.20240808.431__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/METADATA +2 -2
- {mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/RECORD +48 -47
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +14 -1
- model_compression_toolkit/core/common/fusion/graph_fuser.py +135 -0
- model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +4 -0
- model_compression_toolkit/core/common/quantization/debug_config.py +4 -1
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +3 -4
- model_compression_toolkit/core/common/visualization/tensorboard_writer.py +29 -1
- model_compression_toolkit/core/runner.py +21 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +13 -11
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +13 -11
- model_compression_toolkit/metadata.py +61 -2
- model_compression_toolkit/ptq/keras/quantization_facade.py +12 -10
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +12 -12
- model_compression_toolkit/qat/keras/quantization_facade.py +8 -8
- model_compression_toolkit/qat/pytorch/quantization_facade.py +8 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +2 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +18 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +10 -13
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +68 -52
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +5 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +6 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/target_platform_capabilities.py +35 -29
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +5 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/target_platform_capabilities.py +35 -28
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +5 -4
- model_compression_toolkit/xquant/common/constants.py +3 -0
- model_compression_toolkit/xquant/common/core_report_generator.py +9 -1
- model_compression_toolkit/xquant/common/framework_report_utils.py +5 -14
- model_compression_toolkit/xquant/common/tensorboard_utils.py +30 -5
- model_compression_toolkit/xquant/keras/facade_xquant_report.py +2 -0
- model_compression_toolkit/xquant/keras/keras_report_utils.py +3 -1
- model_compression_toolkit/xquant/keras/tensorboard_utils.py +101 -4
- model_compression_toolkit/xquant/pytorch/facade_xquant_report.py +2 -0
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -2
- model_compression_toolkit/xquant/pytorch/tensorboard_utils.py +109 -3
- {mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/top_level.txt +0 -0
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py
CHANGED
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
AttributeQuantizationConfig
|
24
24
|
|
@@ -94,7 +94,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
94
94
|
quantization_preserving=False,
|
95
95
|
fixed_scale=None,
|
96
96
|
fixed_zero_point=None,
|
97
|
-
simd_size=32
|
97
|
+
simd_size=32,
|
98
|
+
signedness=Signedness.AUTO)
|
98
99
|
|
99
100
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
100
101
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -107,7 +108,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
107
108
|
quantization_preserving=False,
|
108
109
|
fixed_scale=None,
|
109
110
|
fixed_zero_point=None,
|
110
|
-
simd_size=32
|
111
|
+
simd_size=32,
|
112
|
+
signedness=Signedness.AUTO)
|
111
113
|
|
112
114
|
# To quantize a model using mixed-precision, create
|
113
115
|
# a list with more than one OpQuantizationConfig.
|
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
AttributeQuantizationConfig
|
24
24
|
|
@@ -100,7 +100,8 @@ def get_op_quantization_configs() -> \
|
|
100
100
|
quantization_preserving=False,
|
101
101
|
fixed_scale=None,
|
102
102
|
fixed_zero_point=None,
|
103
|
-
simd_size=32
|
103
|
+
simd_size=32,
|
104
|
+
signedness=Signedness.AUTO)
|
104
105
|
|
105
106
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
106
107
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -113,7 +114,8 @@ def get_op_quantization_configs() -> \
|
|
113
114
|
quantization_preserving=False,
|
114
115
|
fixed_scale=None,
|
115
116
|
fixed_zero_point=None,
|
116
|
-
simd_size=32
|
117
|
+
simd_size=32,
|
118
|
+
signedness=Signedness.AUTO)
|
117
119
|
|
118
120
|
# To quantize a model using mixed-precision, create
|
119
121
|
# a list with more than one OpQuantizationConfig.
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py
CHANGED
@@ -19,7 +19,7 @@ from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
20
20
|
WEIGHTS_QUANTIZATION_METHOD
|
21
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
22
|
-
TargetPlatformModel
|
22
|
+
TargetPlatformModel, Signedness
|
23
23
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
24
24
|
AttributeQuantizationConfig
|
25
25
|
|
@@ -96,7 +96,8 @@ def get_op_quantization_configs() -> \
|
|
96
96
|
quantization_preserving=False,
|
97
97
|
fixed_scale=None,
|
98
98
|
fixed_zero_point=None,
|
99
|
-
simd_size=32
|
99
|
+
simd_size=32,
|
100
|
+
signedness=Signedness.AUTO)
|
100
101
|
|
101
102
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
102
103
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -109,7 +110,8 @@ def get_op_quantization_configs() -> \
|
|
109
110
|
quantization_preserving=False,
|
110
111
|
fixed_scale=None,
|
111
112
|
fixed_zero_point=None,
|
112
|
-
simd_size=32
|
113
|
+
simd_size=32,
|
114
|
+
signedness=Signedness.AUTO)
|
113
115
|
|
114
116
|
# To quantize a model using mixed-precision, create
|
115
117
|
# a list with more than one OpQuantizationConfig.
|
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
AttributeQuantizationConfig
|
24
24
|
|
@@ -100,7 +100,8 @@ def get_op_quantization_configs() -> \
|
|
100
100
|
quantization_preserving=False,
|
101
101
|
fixed_scale=None,
|
102
102
|
fixed_zero_point=None,
|
103
|
-
simd_size=32
|
103
|
+
simd_size=32,
|
104
|
+
signedness=Signedness.AUTO)
|
104
105
|
|
105
106
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
106
107
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -113,7 +114,8 @@ def get_op_quantization_configs() -> \
|
|
113
114
|
quantization_preserving=False,
|
114
115
|
fixed_scale=None,
|
115
116
|
fixed_zero_point=None,
|
116
|
-
simd_size=32
|
117
|
+
simd_size=32,
|
118
|
+
signedness=Signedness.AUTO)
|
117
119
|
|
118
120
|
# To quantize a model using mixed-precision, create
|
119
121
|
# a list with more than one OpQuantizationConfig.
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py
CHANGED
@@ -19,7 +19,7 @@ from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS, \
|
20
20
|
WEIGHTS_QUANTIZATION_METHOD
|
21
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
22
|
-
TargetPlatformModel
|
22
|
+
TargetPlatformModel, Signedness
|
23
23
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
24
24
|
AttributeQuantizationConfig
|
25
25
|
|
@@ -96,7 +96,8 @@ def get_op_quantization_configs() -> \
|
|
96
96
|
quantization_preserving=False,
|
97
97
|
fixed_scale=None,
|
98
98
|
fixed_zero_point=None,
|
99
|
-
simd_size=32
|
99
|
+
simd_size=32,
|
100
|
+
signedness=Signedness.AUTO)
|
100
101
|
|
101
102
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
102
103
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -109,7 +110,8 @@ def get_op_quantization_configs() -> \
|
|
109
110
|
quantization_preserving=False,
|
110
111
|
fixed_scale=None,
|
111
112
|
fixed_zero_point=None,
|
112
|
-
simd_size=32
|
113
|
+
simd_size=32,
|
114
|
+
signedness=Signedness.AUTO)
|
113
115
|
|
114
116
|
# To quantize a model using mixed-precision, create
|
115
117
|
# a list with more than one OpQuantizationConfig.
|
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
AttributeQuantizationConfig
|
24
24
|
|
@@ -100,7 +100,8 @@ def get_op_quantization_configs() -> \
|
|
100
100
|
quantization_preserving=False,
|
101
101
|
fixed_scale=None,
|
102
102
|
fixed_zero_point=None,
|
103
|
-
simd_size=32
|
103
|
+
simd_size=32,
|
104
|
+
signedness=Signedness.AUTO)
|
104
105
|
|
105
106
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
106
107
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -113,7 +114,8 @@ def get_op_quantization_configs() -> \
|
|
113
114
|
quantization_preserving=False,
|
114
115
|
fixed_scale=None,
|
115
116
|
fixed_zero_point=None,
|
116
|
-
simd_size=32
|
117
|
+
simd_size=32,
|
118
|
+
signedness=Signedness.AUTO)
|
117
119
|
|
118
120
|
# To quantize a model using mixed-precision, create
|
119
121
|
# a list with more than one OpQuantizationConfig.
|
@@ -170,7 +172,7 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
170
172
|
const_config_input16 = const_config.clone_and_edit(
|
171
173
|
supported_input_activation_n_bits=(8, 16))
|
172
174
|
const_config_input16_output16 = const_config_input16.clone_and_edit(
|
173
|
-
activation_n_bits=16,
|
175
|
+
activation_n_bits=16, signedness=Signedness.SIGNED)
|
174
176
|
const_configuration_options_inout16 = tp.QuantizationConfigOptions([const_config_input16_output16,
|
175
177
|
const_config_input16],
|
176
178
|
base_config=const_config_input16)
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py
CHANGED
@@ -17,7 +17,7 @@ import operator
|
|
17
17
|
|
18
18
|
import torch
|
19
19
|
from torch import add, sub, mul, div, flatten, reshape, split, unsqueeze, dropout, sigmoid, tanh, chunk, unbind, topk, \
|
20
|
-
gather, equal, transpose, permute, argmax, squeeze
|
20
|
+
gather, equal, transpose, permute, argmax, squeeze, multiply, subtract
|
21
21
|
from torch.nn import Conv2d, Linear, ConvTranspose2d, MaxPool2d
|
22
22
|
from torch.nn import Dropout, Flatten, Hardtanh, Identity
|
23
23
|
from torch.nn import ReLU, ReLU6, PReLU, SiLU, Sigmoid, Tanh, Hardswish, LeakyReLU
|
@@ -101,8 +101,8 @@ def generate_pytorch_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
101
101
|
tp.LayerFilterParams(hardtanh, min_val=0)])
|
102
102
|
|
103
103
|
tp.OperationsSetToLayers("Add", [operator.add, add])
|
104
|
-
tp.OperationsSetToLayers("Sub", [operator.sub, sub])
|
105
|
-
tp.OperationsSetToLayers("Mul", [operator.mul, mul])
|
104
|
+
tp.OperationsSetToLayers("Sub", [operator.sub, sub, subtract])
|
105
|
+
tp.OperationsSetToLayers("Mul", [operator.mul, mul, multiply])
|
106
106
|
tp.OperationsSetToLayers("Div", [operator.truediv, div])
|
107
107
|
tp.OperationsSetToLayers("PReLU", [PReLU, prelu])
|
108
108
|
tp.OperationsSetToLayers("Swish", [SiLU, silu, Hardswish, hardswish])
|
@@ -14,35 +14,41 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH, TENSORFLOW, PYTORCH
|
17
|
+
from model_compression_toolkit.logger import Logger
|
17
18
|
from model_compression_toolkit.target_platform_capabilities.constants import LATEST
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
###############################
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
def get_tpc_dict_by_fw(fw_name):
|
21
|
+
tpc_models_dict = None
|
22
|
+
if fw_name == TENSORFLOW:
|
23
|
+
###############################
|
24
|
+
# Build Tensorflow TPC models
|
25
|
+
###############################
|
26
|
+
if FOUND_TF:
|
27
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.v1.tpc_keras import \
|
28
|
+
get_keras_tpc as get_keras_tpc_v1
|
29
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.latest import \
|
30
|
+
get_keras_tpc_latest
|
31
|
+
|
32
|
+
# Keras: TPC versioning
|
33
|
+
tpc_models_dict = {'v1': get_keras_tpc_v1,
|
34
|
+
LATEST: get_keras_tpc_latest}
|
35
|
+
elif fw_name == PYTORCH:
|
36
|
+
###############################
|
37
|
+
# Build Pytorch TPC models
|
38
|
+
###############################
|
39
|
+
if FOUND_TORCH:
|
40
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.v1.tpc_pytorch import \
|
41
|
+
get_pytorch_tpc as get_pytorch_tpc_v1
|
42
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.qnnpack_tpc.latest import \
|
43
|
+
get_pytorch_tpc_latest
|
44
|
+
|
45
|
+
# Pytorch: TPC versioning
|
46
|
+
tpc_models_dict = {'v1': get_pytorch_tpc_v1,
|
43
47
|
LATEST: get_pytorch_tpc_latest}
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
48
|
+
if tpc_models_dict is not None:
|
49
|
+
return tpc_models_dict
|
50
|
+
else:
|
51
|
+
Logger.critical(f'Framework {fw_name} is not supported in imx500 or the relevant packages are not '
|
52
|
+
f'installed. Please make sure the relevant packages are installed when using MCT for optimizing'
|
53
|
+
f' a {fw_name} model. For Tensorflow, please install tensorflow. For PyTorch, please install '
|
54
|
+
f'torch.') # pragma: no cover
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py
CHANGED
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
AttributeQuantizationConfig
|
24
24
|
|
@@ -95,7 +95,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
95
95
|
quantization_preserving=False,
|
96
96
|
fixed_scale=None,
|
97
97
|
fixed_zero_point=None,
|
98
|
-
simd_size=32
|
98
|
+
simd_size=32,
|
99
|
+
signedness=Signedness.AUTO)
|
99
100
|
|
100
101
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
101
102
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -108,8 +109,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
108
109
|
quantization_preserving=False,
|
109
110
|
fixed_scale=None,
|
110
111
|
fixed_zero_point=None,
|
111
|
-
simd_size=None
|
112
|
-
|
112
|
+
simd_size=None,
|
113
|
+
signedness=Signedness.AUTO)
|
113
114
|
|
114
115
|
mixed_precision_cfg_list = [] # No mixed precision
|
115
116
|
|
@@ -14,34 +14,41 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH, TENSORFLOW, PYTORCH
|
17
|
+
from model_compression_toolkit.logger import Logger
|
17
18
|
from model_compression_toolkit.target_platform_capabilities.constants import LATEST
|
18
19
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
###############################
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
20
|
+
def get_tpc_dict_by_fw(fw_name):
|
21
|
+
tpc_models_dict = None
|
22
|
+
if fw_name == TENSORFLOW:
|
23
|
+
###############################
|
24
|
+
# Build Tensorflow TPC models
|
25
|
+
###############################
|
26
|
+
if FOUND_TF:
|
27
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tpc_keras import \
|
28
|
+
get_keras_tpc as get_keras_tpc_v1
|
29
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.latest import \
|
30
|
+
get_keras_tpc_latest
|
31
|
+
|
32
|
+
# Keras: TPC versioning
|
33
|
+
tpc_models_dict = {'v1': get_keras_tpc_v1,
|
34
|
+
LATEST: get_keras_tpc_latest}
|
35
|
+
elif fw_name == PYTORCH:
|
36
|
+
###############################
|
37
|
+
# Build Pytorch TPC models
|
38
|
+
###############################
|
39
|
+
if FOUND_TORCH:
|
40
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.v1.tpc_pytorch import \
|
41
|
+
get_pytorch_tpc as get_pytorch_tpc_v1
|
42
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.tflite_tpc.latest import \
|
43
|
+
get_pytorch_tpc_latest
|
44
|
+
|
45
|
+
# Pytorch: TPC versioning
|
46
|
+
tpc_models_dict = {'v1': get_pytorch_tpc_v1,
|
43
47
|
LATEST: get_pytorch_tpc_latest}
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
+
if tpc_models_dict is not None:
|
49
|
+
return tpc_models_dict
|
50
|
+
else:
|
51
|
+
Logger.critical(f'Framework {fw_name} is not supported in imx500 or the relevant packages are not '
|
52
|
+
f'installed. Please make sure the relevant packages are installed when using MCT for optimizing'
|
53
|
+
f' a {fw_name} model. For Tensorflow, please install tensorflow. For PyTorch, please install '
|
54
|
+
f'torch.') # pragma: no cover
|
@@ -18,7 +18,7 @@ import model_compression_toolkit as mct
|
|
18
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
19
|
from model_compression_toolkit.target_platform_capabilities.constants import BIAS_ATTR, KERNEL_ATTR
|
20
20
|
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
-
TargetPlatformModel
|
21
|
+
TargetPlatformModel, Signedness
|
22
22
|
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
23
|
QuantizationMethod, AttributeQuantizationConfig
|
24
24
|
|
@@ -93,7 +93,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
93
93
|
quantization_preserving=False,
|
94
94
|
fixed_scale=None,
|
95
95
|
fixed_zero_point=None,
|
96
|
-
simd_size=32
|
96
|
+
simd_size=32,
|
97
|
+
signedness=Signedness.AUTO)
|
97
98
|
|
98
99
|
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
99
100
|
linear_eight_bits = tp.OpQuantizationConfig(
|
@@ -106,8 +107,8 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
106
107
|
quantization_preserving=False,
|
107
108
|
fixed_scale=None,
|
108
109
|
fixed_zero_point=None,
|
109
|
-
simd_size=None
|
110
|
-
|
110
|
+
simd_size=None,
|
111
|
+
signedness=Signedness.AUTO)
|
111
112
|
|
112
113
|
mixed_precision_cfg_list = [] # No mixed precision
|
113
114
|
|
@@ -27,6 +27,8 @@ INTERMEDIATE_SIMILARITY_METRICS_VAL = 'intermediate_similarity_metrics_val'
|
|
27
27
|
# Graph attribute names:
|
28
28
|
XQUANT_REPR = 'xquant_repr'
|
29
29
|
XQUANT_VAL = 'xquant_val'
|
30
|
+
CUT_MEMORY_ELEMENTS = 'cut_memory_elements'
|
31
|
+
CUT_TOTAL_SIZE = 'cut_total_size'
|
30
32
|
|
31
33
|
# Report file name:
|
32
34
|
REPORT_FILENAME = 'quant_report.json'
|
@@ -36,3 +38,4 @@ TENSORBOARD_DEFAULT_TAG = 'xquant'
|
|
36
38
|
|
37
39
|
# When extracting the activations of a model we hold the output using a dedicated key:
|
38
40
|
MODEL_OUTPUT_KEY = 'model_output_key'
|
41
|
+
|
@@ -45,6 +45,9 @@ def core_report_generator(float_model: Any,
|
|
45
45
|
Returns:
|
46
46
|
Dict[str, Any]: A dictionary containing the collected similarity metrics and report data.
|
47
47
|
"""
|
48
|
+
# Get metadata from the quantized model
|
49
|
+
quantized_model_metadata = fw_report_utils.get_metadata_fn(quantized_model)
|
50
|
+
|
48
51
|
# Collect histograms on the float model.
|
49
52
|
float_graph = fw_report_utils.model_folding_utils.create_float_folded_graph(float_model, repr_dataset)
|
50
53
|
mi = ModelCollector(float_graph, fw_report_utils.fw_impl, fw_report_utils.fw_info)
|
@@ -74,7 +77,12 @@ def core_report_generator(float_model: Any,
|
|
74
77
|
# Add a graph of the quantized model with the similarity metrics to TensorBoard for visualization.
|
75
78
|
fw_report_utils.tb_utils.add_graph_to_tensorboard(quantized_model,
|
76
79
|
similarity_metrics,
|
77
|
-
repr_dataset
|
80
|
+
repr_dataset,
|
81
|
+
quantized_model_metadata)
|
82
|
+
|
83
|
+
# Adds text information (like max cut and output similarity metrics) to the tensorboard writer.
|
84
|
+
fw_report_utils.tb_utils.add_text_information(similarity_metrics,
|
85
|
+
quantized_model_metadata)
|
78
86
|
|
79
87
|
# Save data to a json file.
|
80
88
|
fw_report_utils.dump_report_to_json(report_dir=xquant_config.report_dir,
|
@@ -18,7 +18,7 @@ import os
|
|
18
18
|
|
19
19
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
20
20
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
21
|
-
from typing import Any, Dict
|
21
|
+
from typing import Any, Dict, Callable
|
22
22
|
|
23
23
|
from model_compression_toolkit.xquant.common.constants import REPORT_FILENAME
|
24
24
|
from model_compression_toolkit.xquant.common.dataset_utils import DatasetUtils
|
@@ -39,7 +39,8 @@ class FrameworkReportUtils:
|
|
39
39
|
similarity_calculator: SimilarityCalculator,
|
40
40
|
dataset_utils: DatasetUtils,
|
41
41
|
model_folding_utils: ModelFoldingUtils,
|
42
|
-
tb_utils: TensorboardUtils
|
42
|
+
tb_utils: TensorboardUtils,
|
43
|
+
get_metadata_fn: Callable):
|
43
44
|
"""
|
44
45
|
Initializes the FrameworkReportUtils class with various utility components required for generating the report.
|
45
46
|
|
@@ -50,6 +51,7 @@ class FrameworkReportUtils:
|
|
50
51
|
dataset_utils (DatasetUtils): Utilities for handling datasets.
|
51
52
|
model_folding_utils (ModelFoldingUtils): Utilities for model folding operations.
|
52
53
|
tb_utils (TensorboardUtils): Utilities for TensorBoard operations.
|
54
|
+
get_metadata_fn (Callable): Function to retrieve the metadata from the quantized model.
|
53
55
|
"""
|
54
56
|
self.fw_info = fw_info
|
55
57
|
self.fw_impl = fw_impl
|
@@ -57,18 +59,7 @@ class FrameworkReportUtils:
|
|
57
59
|
self.dataset_utils = dataset_utils
|
58
60
|
self.model_folding_utils = model_folding_utils
|
59
61
|
self.tb_utils = tb_utils
|
60
|
-
|
61
|
-
def create_report_directory(self, dir_path: str):
|
62
|
-
"""
|
63
|
-
Create a directory for saving reports.
|
64
|
-
|
65
|
-
Args:
|
66
|
-
dir_path (str): The path to the directory to create.
|
67
|
-
|
68
|
-
"""
|
69
|
-
if not os.path.exists(dir_path):
|
70
|
-
os.makedirs(dir_path, exist_ok=True)
|
71
|
-
Logger.info(f"Directory created at: {dir_path}")
|
62
|
+
self.get_metadata_fn = get_metadata_fn
|
72
63
|
|
73
64
|
def dump_report_to_json(self,
|
74
65
|
report_dir: str,
|
@@ -12,18 +12,20 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
|
15
|
+
from model_compression_toolkit.constants import MAX_CUT
|
16
16
|
from model_compression_toolkit.core.common import Graph
|
17
17
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
18
18
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
19
19
|
|
20
20
|
|
21
21
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter
|
22
|
-
from model_compression_toolkit.xquant.common.constants import TENSORBOARD_DEFAULT_TAG
|
22
|
+
from model_compression_toolkit.xquant.common.constants import TENSORBOARD_DEFAULT_TAG, OUTPUT_SIMILARITY_METRICS_REPR, \
|
23
|
+
OUTPUT_SIMILARITY_METRICS_VAL
|
23
24
|
from model_compression_toolkit.logger import Logger
|
24
25
|
|
25
26
|
|
26
27
|
from typing import Any, Dict, Callable
|
28
|
+
from mct_quantizers.keras.metadata import get_metadata
|
27
29
|
|
28
30
|
|
29
31
|
class TensorboardUtils:
|
@@ -52,7 +54,8 @@ class TensorboardUtils:
|
|
52
54
|
def get_graph_for_tensorboard_display(self,
|
53
55
|
quantized_model: Any,
|
54
56
|
similarity_metrics: Dict[str, Any],
|
55
|
-
repr_dataset: Callable
|
57
|
+
repr_dataset: Callable,
|
58
|
+
quantized_model_metadata: Dict) -> Graph:
|
56
59
|
"""
|
57
60
|
Get the graph for Tensorboard display. The framework-specific implementations
|
58
61
|
(like KerasTensorboardUtils and PytorchTensorboardUtils) should implement this
|
@@ -62,6 +65,7 @@ class TensorboardUtils:
|
|
62
65
|
quantized_model (Any): The quantized model.
|
63
66
|
similarity_metrics (Dict[str, Any]): Metrics for model similarity.
|
64
67
|
repr_dataset (Callable): Representative dataset function.
|
68
|
+
quantized_model_metadata (Dict): Metadata from the quantized model.
|
65
69
|
|
66
70
|
Returns:
|
67
71
|
Graph: The generated graph for Tensorboard display.
|
@@ -81,7 +85,8 @@ class TensorboardUtils:
|
|
81
85
|
def add_graph_to_tensorboard(self,
|
82
86
|
quantized_model: Any,
|
83
87
|
similarity_metrics: Dict[str, Any],
|
84
|
-
repr_dataset: Callable
|
88
|
+
repr_dataset: Callable,
|
89
|
+
quantized_model_metadata: Dict):
|
85
90
|
"""
|
86
91
|
Add a graph to Tensorboard. The graph represents the quantized graph
|
87
92
|
with the similarity metrics that were measured in different nodes.
|
@@ -90,12 +95,32 @@ class TensorboardUtils:
|
|
90
95
|
quantized_model (Any): The quantized model.
|
91
96
|
similarity_metrics (Dict[str, Any]): The similarity metrics that were collected.
|
92
97
|
repr_dataset (Callable): Representative dataset to use (if needed, like in pytorch case).
|
98
|
+
quantized_model_metadata (Dict): Metadata from the quantized model.
|
93
99
|
"""
|
94
100
|
# Generate the quantized graph with similarity metrics.
|
95
101
|
tb_graph = self.get_graph_for_tensorboard_display(quantized_model=quantized_model,
|
96
102
|
similarity_metrics=similarity_metrics,
|
97
|
-
repr_dataset=repr_dataset
|
103
|
+
repr_dataset=repr_dataset,
|
104
|
+
quantized_model_metadata=quantized_model_metadata)
|
98
105
|
|
99
106
|
self.tb_writer.add_graph(tb_graph, TENSORBOARD_DEFAULT_TAG)
|
100
107
|
|
108
|
+
def add_text_information(self,
|
109
|
+
similarity_metrics: Dict[str, Dict[str, float]],
|
110
|
+
quantized_model_metadata: Dict[str, Any]):
|
111
|
+
"""
|
112
|
+
Adds text information (like max cut and output similarity metrics) to the tensorboard writer.
|
101
113
|
|
114
|
+
Args:
|
115
|
+
similarity_metrics (Dict[str, Dict[str, float]]): A dictionary containing similarity metrics between quantized and float models for both representative and validation datasets.
|
116
|
+
quantized_model_metadata (Dict): Metadata from the quantized model.
|
117
|
+
"""
|
118
|
+
# Add the computed max cut
|
119
|
+
maxcut_str = f"MaxCut: {quantized_model_metadata['scheduling_info'][MAX_CUT]}"
|
120
|
+
self.tb_writer.add_text(maxcut_str, MAX_CUT)
|
121
|
+
|
122
|
+
# Add output similarity between quantized and float models on representative and validation datasets
|
123
|
+
output_similarity_repr = f"Similarity Metrics on outputs using representative dataset: \n" + "\n".join([f"{key}: {value:.4f}" for key, value in similarity_metrics[OUTPUT_SIMILARITY_METRICS_REPR].items()])
|
124
|
+
output_similarity_val = f"Similarity Metrics on outputs using validation dataset: \n" + "\n".join([f"{key}: {value:.4f}" for key, value in similarity_metrics[OUTPUT_SIMILARITY_METRICS_VAL].items()])
|
125
|
+
self.tb_writer.add_text(output_similarity_repr, OUTPUT_SIMILARITY_METRICS_REPR)
|
126
|
+
self.tb_writer.add_text(output_similarity_val, OUTPUT_SIMILARITY_METRICS_VAL)
|
@@ -25,6 +25,7 @@ from model_compression_toolkit.xquant.keras.model_analyzer import KerasModelAnal
|
|
25
25
|
|
26
26
|
from model_compression_toolkit.xquant.keras.similarity_functions import KerasSimilarityFunctions
|
27
27
|
from model_compression_toolkit.xquant.keras.tensorboard_utils import KerasTensorboardUtils
|
28
|
+
from mct_quantizers.keras.metadata import get_metadata
|
28
29
|
|
29
30
|
|
30
31
|
class KerasReportUtils(FrameworkReportUtils):
|
@@ -57,4 +58,5 @@ class KerasReportUtils(FrameworkReportUtils):
|
|
57
58
|
similarity_calculator,
|
58
59
|
dataset_utils,
|
59
60
|
model_folding,
|
60
|
-
tb_utils
|
61
|
+
tb_utils,
|
62
|
+
get_metadata)
|