mct-nightly 1.11.0.20240321.357__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/METADATA +17 -9
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/RECORD +152 -152
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +1 -1
- model_compression_toolkit/core/__init__.py +3 -3
- model_compression_toolkit/core/common/collectors/base_collector.py +2 -2
- model_compression_toolkit/core/common/data_loader.py +3 -3
- model_compression_toolkit/core/common/graph/base_graph.py +10 -13
- model_compression_toolkit/core/common/graph/base_node.py +3 -3
- model_compression_toolkit/core/common/graph/edge.py +2 -1
- model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py +2 -4
- model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +2 -2
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
- model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +1 -2
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +24 -23
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +110 -112
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +114 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py → resource_utilization_tools/resource_utilization_data.py} +19 -19
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +105 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +26 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} +61 -61
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +75 -71
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +2 -4
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +34 -34
- model_compression_toolkit/core/common/model_collector.py +2 -2
- model_compression_toolkit/core/common/network_editors/actions.py +3 -3
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +12 -12
- model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/memory_calculator.py +7 -7
- model_compression_toolkit/core/common/pruning/prune_graph.py +2 -3
- model_compression_toolkit/core/common/pruning/pruner.py +7 -7
- model_compression_toolkit/core/common/pruning/pruning_config.py +1 -1
- model_compression_toolkit/core/common/pruning/pruning_info.py +2 -2
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +7 -4
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +3 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +4 -2
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +4 -6
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +2 -4
- model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -1
- model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py +8 -6
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +4 -6
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +4 -7
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +3 -3
- model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +1 -1
- model_compression_toolkit/core/common/substitutions/weights_activation_split.py +3 -3
- model_compression_toolkit/core/common/user_info.py +1 -1
- model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/keras/back2framework/instance_builder.py +2 -2
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +4 -8
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py +3 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +2 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +3 -3
- model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py +1 -2
- model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py +5 -6
- model_compression_toolkit/core/keras/keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +2 -4
- model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py +7 -7
- model_compression_toolkit/core/keras/reader/common.py +2 -2
- model_compression_toolkit/core/keras/reader/node_builder.py +1 -1
- model_compression_toolkit/core/keras/{kpi_data_facade.py → resource_utilization_data_facade.py} +25 -24
- model_compression_toolkit/core/keras/tf_tensor_numpy.py +4 -2
- model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +6 -11
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +5 -5
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +3 -7
- model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py +1 -2
- model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py +2 -2
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +3 -3
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +1 -1
- model_compression_toolkit/core/pytorch/reader/graph_builders.py +5 -7
- model_compression_toolkit/core/pytorch/reader/reader.py +2 -2
- model_compression_toolkit/core/pytorch/{kpi_data_facade.py → resource_utilization_data_facade.py} +24 -22
- model_compression_toolkit/core/pytorch/utils.py +3 -2
- model_compression_toolkit/core/runner.py +43 -42
- model_compression_toolkit/data_generation/common/data_generation.py +18 -18
- model_compression_toolkit/data_generation/common/model_info_exctractors.py +1 -1
- model_compression_toolkit/data_generation/keras/keras_data_generation.py +7 -10
- model_compression_toolkit/data_generation/keras/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/image_initilization.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +2 -4
- model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +8 -11
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +1 -1
- model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +8 -4
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +7 -8
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +19 -12
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +10 -11
- model_compression_toolkit/gptq/common/gptq_graph.py +3 -3
- model_compression_toolkit/gptq/common/gptq_training.py +14 -12
- model_compression_toolkit/gptq/keras/gptq_training.py +10 -8
- model_compression_toolkit/gptq/keras/graph_info.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +15 -17
- model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +4 -5
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/gptq/pytorch/gptq_training.py +10 -8
- model_compression_toolkit/gptq/pytorch/graph_info.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +11 -13
- model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +3 -4
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/logger.py +1 -13
- model_compression_toolkit/pruning/keras/pruning_facade.py +11 -12
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +11 -12
- model_compression_toolkit/ptq/keras/quantization_facade.py +13 -14
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +7 -8
- model_compression_toolkit/qat/keras/quantization_facade.py +20 -22
- model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +12 -14
- model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/target_platform_capabilities/immutable.py +4 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +4 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +43 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +13 -18
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py +2 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +5 -5
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +13 -13
- model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py +14 -7
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +5 -5
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -3
- model_compression_toolkit/trainable_infrastructure/keras/load_model.py +4 -5
- model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +3 -4
- model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +0 -112
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +0 -26
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/WHEEL +0 -0
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/core/common/mixed_precision/{kpi_tools → resource_utilization_tools}/__init__.py +0 -0
|
@@ -17,7 +17,7 @@ from typing import Callable, Tuple
|
|
|
17
17
|
|
|
18
18
|
from model_compression_toolkit import get_target_platform_capabilities
|
|
19
19
|
from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
|
|
20
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
20
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
21
21
|
from model_compression_toolkit.core.common.pruning.pruner import Pruner
|
|
22
22
|
from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
|
|
23
23
|
from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
|
|
@@ -37,13 +37,13 @@ if FOUND_TF:
|
|
|
37
37
|
DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
|
|
38
38
|
|
|
39
39
|
def keras_pruning_experimental(model: Model,
|
|
40
|
-
|
|
40
|
+
target_resource_utilization: ResourceUtilization,
|
|
41
41
|
representative_data_gen: Callable,
|
|
42
42
|
pruning_config: PruningConfig = PruningConfig(),
|
|
43
43
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, PruningInfo]:
|
|
44
44
|
"""
|
|
45
|
-
Perform structured pruning on a Keras model to meet a specified target
|
|
46
|
-
This function prunes the provided model according to the target
|
|
45
|
+
Perform structured pruning on a Keras model to meet a specified target resource utilization.
|
|
46
|
+
This function prunes the provided model according to the target resource utilization by grouping and pruning
|
|
47
47
|
channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC).
|
|
48
48
|
By default, the importance of each channel group is determined using the Label-Free Hessian
|
|
49
49
|
(LFH) method, assessing each channel's sensitivity to the Hessian of the loss function.
|
|
@@ -55,7 +55,7 @@ if FOUND_TF:
|
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
57
|
model (Model): The original Keras model to be pruned.
|
|
58
|
-
|
|
58
|
+
target_resource_utilization (ResourceUtilization): The target Key Performance Indicators to be achieved through pruning.
|
|
59
59
|
representative_data_gen (Callable): A function to generate representative data for pruning analysis.
|
|
60
60
|
pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config.
|
|
61
61
|
target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities. Defaults to DEFAULT_KERAS_TPC.
|
|
@@ -82,12 +82,12 @@ if FOUND_TF:
|
|
|
82
82
|
>>> import numpy as np
|
|
83
83
|
>>> def repr_datagen(): yield [np.random.random((1, 224, 224, 3))]
|
|
84
84
|
|
|
85
|
-
Define a target
|
|
85
|
+
Define a target resource utilization for pruning.
|
|
86
86
|
Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights
|
|
87
87
|
are represented in float32 data type (thus, each parameter is represented using 4 bytes):
|
|
88
88
|
|
|
89
89
|
>>> dense_nparams = sum([l.count_params() for l in model.layers])
|
|
90
|
-
>>>
|
|
90
|
+
>>> target_resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_nparams * 4 * 0.5)
|
|
91
91
|
|
|
92
92
|
Optionally, define a pruning configuration. num_score_approximations can be passed
|
|
93
93
|
to configure the number of importance scores that will be calculated for each channel.
|
|
@@ -98,7 +98,7 @@ if FOUND_TF:
|
|
|
98
98
|
|
|
99
99
|
Perform pruning:
|
|
100
100
|
|
|
101
|
-
>>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model,
|
|
101
|
+
>>> pruned_model, pruning_info = mct.pruning.keras_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
|
|
102
102
|
|
|
103
103
|
"""
|
|
104
104
|
|
|
@@ -126,7 +126,7 @@ if FOUND_TF:
|
|
|
126
126
|
pruner = Pruner(float_graph_with_compression_config,
|
|
127
127
|
DEFAULT_KERAS_INFO,
|
|
128
128
|
fw_impl,
|
|
129
|
-
|
|
129
|
+
target_resource_utilization,
|
|
130
130
|
representative_data_gen,
|
|
131
131
|
pruning_config,
|
|
132
132
|
target_platform_capabilities)
|
|
@@ -148,6 +148,5 @@ else:
|
|
|
148
148
|
# If tensorflow is not installed,
|
|
149
149
|
# we raise an exception when trying to use these functions.
|
|
150
150
|
def keras_pruning_experimental(*args, **kwargs):
|
|
151
|
-
Logger.critical(
|
|
152
|
-
'
|
|
153
|
-
'Could not find Tensorflow package.') # pragma: no cover
|
|
151
|
+
Logger.critical("Tensorflow must be installed to use keras_pruning_experimental. "
|
|
152
|
+
"The 'tensorflow' package is missing.") # pragma: no cover
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
from typing import Callable, Tuple
|
|
17
17
|
from model_compression_toolkit import get_target_platform_capabilities
|
|
18
18
|
from model_compression_toolkit.constants import FOUND_TORCH, PYTORCH
|
|
19
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
19
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
20
20
|
from model_compression_toolkit.core.common.pruning.pruner import Pruner
|
|
21
21
|
from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
|
|
22
22
|
from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
|
|
@@ -41,14 +41,14 @@ if FOUND_TORCH:
|
|
|
41
41
|
DEFAULT_PYOTRCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
|
|
42
42
|
|
|
43
43
|
def pytorch_pruning_experimental(model: Module,
|
|
44
|
-
|
|
44
|
+
target_resource_utilization: ResourceUtilization,
|
|
45
45
|
representative_data_gen: Callable,
|
|
46
46
|
pruning_config: PruningConfig = PruningConfig(),
|
|
47
47
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYOTRCH_TPC) -> \
|
|
48
48
|
Tuple[Module, PruningInfo]:
|
|
49
49
|
"""
|
|
50
|
-
Perform structured pruning on a Pytorch model to meet a specified target
|
|
51
|
-
This function prunes the provided model according to the target
|
|
50
|
+
Perform structured pruning on a Pytorch model to meet a specified target resource utilization.
|
|
51
|
+
This function prunes the provided model according to the target resource utilization by grouping and pruning
|
|
52
52
|
channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC).
|
|
53
53
|
By default, the importance of each channel group is determined using the Label-Free Hessian
|
|
54
54
|
(LFH) method, assessing each channel's sensitivity to the Hessian of the loss function.
|
|
@@ -60,7 +60,7 @@ if FOUND_TORCH:
|
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
62
|
model (Module): The PyTorch model to be pruned.
|
|
63
|
-
|
|
63
|
+
target_resource_utilization (ResourceUtilization): Key Performance Indicators specifying the pruning targets.
|
|
64
64
|
representative_data_gen (Callable): A function to generate representative data for pruning analysis.
|
|
65
65
|
pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config.
|
|
66
66
|
target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities.
|
|
@@ -88,12 +88,12 @@ if FOUND_TORCH:
|
|
|
88
88
|
>>> import numpy as np
|
|
89
89
|
>>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
|
|
90
90
|
|
|
91
|
-
Define a target
|
|
91
|
+
Define a target resource utilization for pruning.
|
|
92
92
|
Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights
|
|
93
93
|
are represented in float32 data type (thus, each parameter is represented using 4 bytes):
|
|
94
94
|
|
|
95
95
|
>>> dense_nparams = sum(p.numel() for p in model.state_dict().values())
|
|
96
|
-
>>>
|
|
96
|
+
>>> target_resource_utilization = mct.core.ResourceUtilization(weights_memory=dense_nparams * 4 * 0.5)
|
|
97
97
|
|
|
98
98
|
Optionally, define a pruning configuration. num_score_approximations can be passed
|
|
99
99
|
to configure the number of importance scores that will be calculated for each channel.
|
|
@@ -104,7 +104,7 @@ if FOUND_TORCH:
|
|
|
104
104
|
|
|
105
105
|
Perform pruning:
|
|
106
106
|
|
|
107
|
-
>>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model,
|
|
107
|
+
>>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_resource_utilization=target_resource_utilization, representative_data_gen=repr_datagen, pruning_config=pruning_config)
|
|
108
108
|
|
|
109
109
|
"""
|
|
110
110
|
|
|
@@ -132,7 +132,7 @@ if FOUND_TORCH:
|
|
|
132
132
|
pruner = Pruner(float_graph_with_compression_config,
|
|
133
133
|
DEFAULT_PYTORCH_INFO,
|
|
134
134
|
fw_impl,
|
|
135
|
-
|
|
135
|
+
target_resource_utilization,
|
|
136
136
|
representative_data_gen,
|
|
137
137
|
pruning_config,
|
|
138
138
|
target_platform_capabilities)
|
|
@@ -165,6 +165,5 @@ else:
|
|
|
165
165
|
Raises:
|
|
166
166
|
CriticalError: Indicates that PyTorch must be installed to use this function.
|
|
167
167
|
"""
|
|
168
|
-
Logger.critical(
|
|
169
|
-
'
|
|
170
|
-
'Could not find the torch package.') # pragma: no cover
|
|
168
|
+
Logger.critical("PyTorch must be installed to use 'pytorch_pruning_experimental'. "
|
|
169
|
+
"The 'torch' package is missing.") # pragma: no cover
|
|
@@ -20,7 +20,7 @@ from model_compression_toolkit.core.analyzer import analyzer_model_quantization
|
|
|
20
20
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
|
|
21
21
|
from model_compression_toolkit.logger import Logger
|
|
22
22
|
from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
|
|
23
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
23
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
24
24
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
|
25
25
|
MixedPrecisionQuantizationConfig
|
|
26
26
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
|
@@ -42,7 +42,7 @@ if FOUND_TF:
|
|
|
42
42
|
|
|
43
43
|
def keras_post_training_quantization(in_model: Model,
|
|
44
44
|
representative_data_gen: Callable,
|
|
45
|
-
|
|
45
|
+
target_resource_utilization: ResourceUtilization = None,
|
|
46
46
|
core_config: CoreConfig = CoreConfig(),
|
|
47
47
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC):
|
|
48
48
|
"""
|
|
@@ -55,13 +55,13 @@ if FOUND_TF:
|
|
|
55
55
|
statistics. Then, if given a mixed precision config in the core_config, using an ILP solver we find
|
|
56
56
|
a mixed-precision configuration, and set a bit-width for each layer. The model is then quantized
|
|
57
57
|
(both coefficients and activations by default).
|
|
58
|
-
In order to limit the maximal model's size, a target
|
|
58
|
+
In order to limit the maximal model's size, a target ResourceUtilization need to be passed after weights_memory
|
|
59
59
|
is set (in bytes).
|
|
60
60
|
|
|
61
61
|
Args:
|
|
62
62
|
in_model (Model): Keras model to quantize.
|
|
63
63
|
representative_data_gen (Callable): Dataset used for calibration.
|
|
64
|
-
|
|
64
|
+
target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
|
|
65
65
|
core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
|
|
66
66
|
target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to.
|
|
67
67
|
|
|
@@ -99,17 +99,17 @@ if FOUND_TF:
|
|
|
99
99
|
|
|
100
100
|
>>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1))
|
|
101
101
|
|
|
102
|
-
For mixed-precision set a target
|
|
103
|
-
Create a
|
|
102
|
+
For mixed-precision set a target ResourceUtilization object:
|
|
103
|
+
Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients
|
|
104
104
|
that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value,
|
|
105
105
|
while the bias will not):
|
|
106
106
|
|
|
107
|
-
>>>
|
|
107
|
+
>>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits.
|
|
108
108
|
|
|
109
|
-
Pass the model, the representative dataset generator, the configuration and the target
|
|
109
|
+
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a
|
|
110
110
|
quantized model:
|
|
111
111
|
|
|
112
|
-
>>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen,
|
|
112
|
+
>>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen, ru, core_config=config)
|
|
113
113
|
|
|
114
114
|
For more configuration options, please take a look at our `API documentation <https://sony.github.io/model_optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
|
|
115
115
|
|
|
@@ -122,7 +122,7 @@ if FOUND_TF:
|
|
|
122
122
|
|
|
123
123
|
if core_config.mixed_precision_enable:
|
|
124
124
|
if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
|
|
125
|
-
Logger.
|
|
125
|
+
Logger.critical("Given quantization config to mixed-precision facade is not of type "
|
|
126
126
|
"MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
|
|
127
127
|
"API, or pass a valid mixed precision configuration.") # pragma: no cover
|
|
128
128
|
|
|
@@ -137,7 +137,7 @@ if FOUND_TF:
|
|
|
137
137
|
fw_info=fw_info,
|
|
138
138
|
fw_impl=fw_impl,
|
|
139
139
|
tpc=target_platform_capabilities,
|
|
140
|
-
|
|
140
|
+
target_resource_utilization=target_resource_utilization,
|
|
141
141
|
tb_w=tb_w)
|
|
142
142
|
|
|
143
143
|
tg = ptq_runner(tg, representative_data_gen, core_config, fw_info, fw_impl, tb_w)
|
|
@@ -156,6 +156,5 @@ else:
|
|
|
156
156
|
# If tensorflow is not installed,
|
|
157
157
|
# we raise an exception when trying to use these functions.
|
|
158
158
|
def keras_post_training_quantization(*args, **kwargs):
|
|
159
|
-
Logger.critical(
|
|
160
|
-
'
|
|
161
|
-
'Could not find Tensorflow package.') # pragma: no cover
|
|
159
|
+
Logger.critical("Tensorflow must be installed to use keras_post_training_quantization. "
|
|
160
|
+
"The 'tensorflow' package is missing.") # pragma: no cover
|
|
@@ -19,7 +19,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
|
|
|
19
19
|
from model_compression_toolkit.logger import Logger
|
|
20
20
|
from model_compression_toolkit.constants import PYTORCH, FOUND_TORCH
|
|
21
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
|
22
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
22
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
23
23
|
from model_compression_toolkit.core import CoreConfig
|
|
24
24
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
|
25
25
|
MixedPrecisionQuantizationConfig
|
|
@@ -41,7 +41,7 @@ if FOUND_TORCH:
|
|
|
41
41
|
|
|
42
42
|
def pytorch_post_training_quantization(in_module: Module,
|
|
43
43
|
representative_data_gen: Callable,
|
|
44
|
-
|
|
44
|
+
target_resource_utilization: ResourceUtilization = None,
|
|
45
45
|
core_config: CoreConfig = CoreConfig(),
|
|
46
46
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC):
|
|
47
47
|
"""
|
|
@@ -60,7 +60,7 @@ if FOUND_TORCH:
|
|
|
60
60
|
Args:
|
|
61
61
|
in_module (Module): Pytorch module to quantize.
|
|
62
62
|
representative_data_gen (Callable): Dataset used for calibration.
|
|
63
|
-
|
|
63
|
+
target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
|
|
64
64
|
core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
|
|
65
65
|
target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to.
|
|
66
66
|
|
|
@@ -93,7 +93,7 @@ if FOUND_TORCH:
|
|
|
93
93
|
|
|
94
94
|
if core_config.mixed_precision_enable:
|
|
95
95
|
if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
|
|
96
|
-
Logger.
|
|
96
|
+
Logger.critical("Given quantization config to mixed-precision facade is not of type "
|
|
97
97
|
"MixedPrecisionQuantizationConfig. Please use "
|
|
98
98
|
"pytorch_post_training_quantization API, or pass a valid mixed precision "
|
|
99
99
|
"configuration.") # pragma: no cover
|
|
@@ -109,7 +109,7 @@ if FOUND_TORCH:
|
|
|
109
109
|
fw_info=DEFAULT_PYTORCH_INFO,
|
|
110
110
|
fw_impl=fw_impl,
|
|
111
111
|
tpc=target_platform_capabilities,
|
|
112
|
-
|
|
112
|
+
target_resource_utilization=target_resource_utilization,
|
|
113
113
|
tb_w=tb_w)
|
|
114
114
|
|
|
115
115
|
tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_PYTORCH_INFO, fw_impl, tb_w)
|
|
@@ -128,6 +128,5 @@ else:
|
|
|
128
128
|
# If torch is not installed,
|
|
129
129
|
# we raise an exception when trying to use these functions.
|
|
130
130
|
def pytorch_post_training_quantization(*args, **kwargs):
|
|
131
|
-
Logger.critical(
|
|
132
|
-
'
|
|
133
|
-
'Could not find the torch package.') # pragma: no cover
|
|
131
|
+
Logger.critical("PyTorch must be installed to use 'pytorch_post_training_quantization_experimental'. "
|
|
132
|
+
"The 'torch' package is missing.") # pragma: no cover
|
|
@@ -20,7 +20,7 @@ from model_compression_toolkit.core import CoreConfig
|
|
|
20
20
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
|
|
21
21
|
from model_compression_toolkit.logger import Logger
|
|
22
22
|
from model_compression_toolkit.constants import FOUND_TF
|
|
23
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
23
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
24
24
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
|
25
25
|
MixedPrecisionQuantizationConfig
|
|
26
26
|
from mct_quantizers import KerasActivationQuantizationHolder
|
|
@@ -87,7 +87,7 @@ if FOUND_TF:
|
|
|
87
87
|
|
|
88
88
|
def keras_quantization_aware_training_init_experimental(in_model: Model,
|
|
89
89
|
representative_data_gen: Callable,
|
|
90
|
-
|
|
90
|
+
target_resource_utilization: ResourceUtilization = None,
|
|
91
91
|
core_config: CoreConfig = CoreConfig(),
|
|
92
92
|
qat_config: QATConfig = QATConfig(),
|
|
93
93
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC):
|
|
@@ -103,13 +103,13 @@ if FOUND_TF:
|
|
|
103
103
|
a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant
|
|
104
104
|
nodes for quantizing activation. Weights are kept as float and are quantized online while training by the
|
|
105
105
|
quantization wrapper's weight quantizer.
|
|
106
|
-
In order to limit the maximal model's size, a target
|
|
106
|
+
In order to limit the maximal model's size, a target resource utilization need to be passed after weights_memory
|
|
107
107
|
is set (in bytes).
|
|
108
108
|
|
|
109
109
|
Args:
|
|
110
110
|
in_model (Model): Keras model to quantize.
|
|
111
111
|
representative_data_gen (Callable): Dataset used for initial calibration.
|
|
112
|
-
|
|
112
|
+
target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
|
|
113
113
|
core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
|
|
114
114
|
qat_config (QATConfig): QAT configuration
|
|
115
115
|
target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to.
|
|
@@ -149,17 +149,17 @@ if FOUND_TF:
|
|
|
149
149
|
|
|
150
150
|
>>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
|
|
151
151
|
|
|
152
|
-
For mixed-precision set a target
|
|
153
|
-
Create a
|
|
152
|
+
For mixed-precision set a target ResourceUtilization object:
|
|
153
|
+
Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients
|
|
154
154
|
that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value,
|
|
155
155
|
while the bias will not):
|
|
156
156
|
|
|
157
|
-
>>>
|
|
157
|
+
>>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits.
|
|
158
158
|
|
|
159
|
-
Pass the model, the representative dataset generator, the configuration and the target
|
|
159
|
+
Pass the model, the representative dataset generator, the configuration and the target Resource Utilization to get a
|
|
160
160
|
quantized model:
|
|
161
161
|
|
|
162
|
-
>>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen,
|
|
162
|
+
>>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, ru, core_config=config)
|
|
163
163
|
|
|
164
164
|
Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary:
|
|
165
165
|
|
|
@@ -178,7 +178,7 @@ if FOUND_TF:
|
|
|
178
178
|
|
|
179
179
|
if core_config.mixed_precision_enable:
|
|
180
180
|
if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
|
|
181
|
-
Logger.
|
|
181
|
+
Logger.critical("Given quantization config to mixed-precision facade is not of type "
|
|
182
182
|
"MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization API,"
|
|
183
183
|
"or pass a valid mixed precision configuration.")
|
|
184
184
|
|
|
@@ -193,7 +193,7 @@ if FOUND_TF:
|
|
|
193
193
|
fw_info=DEFAULT_KERAS_INFO,
|
|
194
194
|
fw_impl=fw_impl,
|
|
195
195
|
tpc=target_platform_capabilities,
|
|
196
|
-
|
|
196
|
+
target_resource_utilization=target_resource_utilization,
|
|
197
197
|
tb_w=tb_w)
|
|
198
198
|
|
|
199
199
|
tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_KERAS_INFO, fw_impl, tb_w)
|
|
@@ -245,17 +245,17 @@ if FOUND_TF:
|
|
|
245
245
|
|
|
246
246
|
>>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
|
|
247
247
|
|
|
248
|
-
For mixed-precision set a target
|
|
249
|
-
Create a
|
|
248
|
+
For mixed-precision set a target ResourceUtilization object:
|
|
249
|
+
Create a ResourceUtilization object to limit our returned model's size. Note that this value affects only coefficients
|
|
250
250
|
that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value,
|
|
251
251
|
while the bias will not):
|
|
252
252
|
|
|
253
|
-
>>>
|
|
253
|
+
>>> ru = mct.core.ResourceUtilization(model.count_params() * 0.75) # About 0.75 of the model size when quantized with 8 bits.
|
|
254
254
|
|
|
255
|
-
Pass the model, the representative dataset generator, the configuration and the target
|
|
255
|
+
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a
|
|
256
256
|
quantized model:
|
|
257
257
|
|
|
258
|
-
>>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen,
|
|
258
|
+
>>> quantized_model, quantization_info, custom_objects = mct.qat.keras_quantization_aware_training_init_experimental(model, repr_datagen, ru, core_config=config)
|
|
259
259
|
|
|
260
260
|
Use the quantized model for fine-tuning. For loading the model from file, use the custom_objects dictionary:
|
|
261
261
|
|
|
@@ -291,12 +291,10 @@ else:
|
|
|
291
291
|
# If tensorflow is not installed,
|
|
292
292
|
# we raise an exception when trying to use these functions.
|
|
293
293
|
def keras_quantization_aware_training_init_experimental(*args, **kwargs):
|
|
294
|
-
Logger.critical(
|
|
295
|
-
'
|
|
296
|
-
'Could not find Tensorflow package.') # pragma: no cover
|
|
294
|
+
Logger.critical("Tensorflow must be installed to use keras_quantization_aware_training_init_experimental. "
|
|
295
|
+
"The 'tensorflow' package is missing.") # pragma: no cover
|
|
297
296
|
|
|
298
297
|
|
|
299
298
|
def keras_quantization_aware_training_finalize_experimental(*args, **kwargs):
|
|
300
|
-
Logger.critical(
|
|
301
|
-
'
|
|
302
|
-
'Could not find Tensorflow package.') # pragma: no cover
|
|
299
|
+
Logger.critical("Tensorflow must be installed to use keras_quantization_aware_training_finalize_experimental. "
|
|
300
|
+
"The 'tensorflow' package is missing.") # pragma: no cover
|
|
@@ -44,6 +44,5 @@ else:
|
|
|
44
44
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
|
45
45
|
|
|
46
46
|
super().__init__(quantization_config)
|
|
47
|
-
Logger.critical(
|
|
48
|
-
'
|
|
49
|
-
'Could not find Tensorflow package.') # pragma: no cover
|
|
47
|
+
Logger.critical("Tensorflow must be installed to use BaseKerasQATTrainableQuantizer. "
|
|
48
|
+
"The 'tensorflow' package is missing.") # pragma: no cover
|
|
@@ -49,7 +49,7 @@ def get_activation_quantizer_holder(n: common.BaseNode,
|
|
|
49
49
|
# quantization, which in this case has an empty list).
|
|
50
50
|
if len(activation_quantizers) == 1:
|
|
51
51
|
return KerasActivationQuantizationHolder(activation_quantizers[0])
|
|
52
|
-
Logger.
|
|
52
|
+
Logger.critical(f'KerasActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers were found for node {n}.')
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def quantization_builder(n: common.BaseNode,
|
|
@@ -23,7 +23,7 @@ from model_compression_toolkit.core import common
|
|
|
23
23
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
|
|
24
24
|
from model_compression_toolkit.logger import Logger
|
|
25
25
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
|
26
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
26
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
|
27
27
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
|
28
28
|
MixedPrecisionQuantizationConfig
|
|
29
29
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \
|
|
@@ -75,7 +75,7 @@ if FOUND_TORCH:
|
|
|
75
75
|
|
|
76
76
|
def pytorch_quantization_aware_training_init_experimental(in_model: Module,
|
|
77
77
|
representative_data_gen: Callable,
|
|
78
|
-
|
|
78
|
+
target_resource_utilization: ResourceUtilization = None,
|
|
79
79
|
core_config: CoreConfig = CoreConfig(),
|
|
80
80
|
qat_config: QATConfig = QATConfig(),
|
|
81
81
|
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC):
|
|
@@ -91,13 +91,13 @@ if FOUND_TORCH:
|
|
|
91
91
|
a mixed-precision configuration, and set a bit-width for each layer. The model is built with fake_quant
|
|
92
92
|
nodes for quantizing activation. Weights are kept as float and are quantized online while training by the
|
|
93
93
|
quantization wrapper's weight quantizer.
|
|
94
|
-
In order to limit the maximal model's size, a target
|
|
94
|
+
In order to limit the maximal model's size, a target resource utilization need to be passed after weights_memory
|
|
95
95
|
is set (in bytes).
|
|
96
96
|
|
|
97
97
|
Args:
|
|
98
98
|
in_model (Model): Pytorch model to quantize.
|
|
99
99
|
representative_data_gen (Callable): Dataset used for initial calibration.
|
|
100
|
-
|
|
100
|
+
target_resource_utilization (ResourceUtilization): ResourceUtilization object to limit the search of the mixed-precision configuration as desired.
|
|
101
101
|
core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
|
|
102
102
|
qat_config (QATConfig): QAT configuration
|
|
103
103
|
target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Pytorch model according to.
|
|
@@ -131,7 +131,7 @@ if FOUND_TORCH:
|
|
|
131
131
|
|
|
132
132
|
>>> config = mct.core.CoreConfig()
|
|
133
133
|
|
|
134
|
-
Pass the model, the representative dataset generator, the configuration and the target
|
|
134
|
+
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a
|
|
135
135
|
quantized model. Now the model contains quantizer wrappers for fine tunning the weights:
|
|
136
136
|
|
|
137
137
|
>>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
|
|
@@ -146,7 +146,7 @@ if FOUND_TORCH:
|
|
|
146
146
|
|
|
147
147
|
if core_config.mixed_precision_enable:
|
|
148
148
|
if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
|
|
149
|
-
Logger.
|
|
149
|
+
Logger.critical("Given quantization config to mixed-precision facade is not of type "
|
|
150
150
|
"MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API,"
|
|
151
151
|
"or pass a valid mixed precision configuration.")
|
|
152
152
|
|
|
@@ -160,7 +160,7 @@ if FOUND_TORCH:
|
|
|
160
160
|
fw_info=DEFAULT_PYTORCH_INFO,
|
|
161
161
|
fw_impl=fw_impl,
|
|
162
162
|
tpc=target_platform_capabilities,
|
|
163
|
-
|
|
163
|
+
target_resource_utilization=target_resource_utilization,
|
|
164
164
|
tb_w=tb_w)
|
|
165
165
|
|
|
166
166
|
tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_PYTORCH_INFO, fw_impl, tb_w)
|
|
@@ -213,7 +213,7 @@ if FOUND_TORCH:
|
|
|
213
213
|
|
|
214
214
|
>>> config = mct.core.CoreConfig()
|
|
215
215
|
|
|
216
|
-
Pass the model, the representative dataset generator, the configuration and the target
|
|
216
|
+
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a
|
|
217
217
|
quantized model:
|
|
218
218
|
|
|
219
219
|
>>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
|
|
@@ -239,12 +239,10 @@ else:
|
|
|
239
239
|
# If torch is not installed,
|
|
240
240
|
# we raise an exception when trying to use these functions.
|
|
241
241
|
def pytorch_quantization_aware_training_init_experimental(*args, **kwargs):
|
|
242
|
-
Logger.critical('
|
|
243
|
-
'
|
|
244
|
-
'Could not find the torch package.') # pragma: no cover
|
|
242
|
+
Logger.critical('PyTorch must be installed to use pytorch_quantization_aware_training_init_experimental. '
|
|
243
|
+
"The 'torch' package is missing.") # pragma: no cover
|
|
245
244
|
|
|
246
245
|
|
|
247
246
|
def pytorch_quantization_aware_training_finalize_experimental(*args, **kwargs):
|
|
248
|
-
Logger.critical(
|
|
249
|
-
'
|
|
250
|
-
'Could not find the torch package.') # pragma: no cover
|
|
247
|
+
Logger.critical("PyTorch must be installed to use 'pytorch_quantization_aware_training_finalize_experimental'. "
|
|
248
|
+
"The 'torch' package is missing.") # pragma: no cover
|
|
@@ -44,6 +44,5 @@ else:
|
|
|
44
44
|
def __init__(self,
|
|
45
45
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
|
46
46
|
super().__init__(quantization_config)
|
|
47
|
-
Logger.critical(
|
|
48
|
-
'
|
|
49
|
-
'Could not find torch package.') # pragma: no cover
|
|
47
|
+
Logger.critical("Installation of PyTorch is required to use BasePytorchQATTrainableQuantizer. "
|
|
48
|
+
"The 'torch' package was not found.") # pragma: no cover
|
|
@@ -49,7 +49,7 @@ def get_activation_quantizer_holder(n: common.BaseNode,
|
|
|
49
49
|
# quantization, which in this case has an empty list).
|
|
50
50
|
if len(activation_quantizers) == 1:
|
|
51
51
|
return PytorchActivationQuantizationHolder(activation_quantizers[0])
|
|
52
|
-
Logger.
|
|
52
|
+
Logger.critical(f'ActivationQuantizationHolder supports only a single quantizer, but ({len(activation_quantizers)}) quantizers were found for node {n}.')
|
|
53
53
|
|
|
54
54
|
|
|
55
55
|
def quantization_builder(n: common.BaseNode,
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
from typing import Any
|
|
16
16
|
|
|
17
|
+
from model_compression_toolkit.logger import Logger
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
class ImmutableClass(object):
|
|
19
21
|
"""
|
|
@@ -36,7 +38,7 @@ class ImmutableClass(object):
|
|
|
36
38
|
|
|
37
39
|
"""
|
|
38
40
|
if self._initialized:
|
|
39
|
-
|
|
41
|
+
Logger.critical("Immutable class. Can't edit attributes.")
|
|
40
42
|
else:
|
|
41
43
|
object.__setattr__(self,
|
|
42
44
|
*args,
|
|
@@ -49,5 +51,5 @@ class ImmutableClass(object):
|
|
|
49
51
|
|
|
50
52
|
"""
|
|
51
53
|
if self._initialized:
|
|
52
|
-
|
|
54
|
+
Logger.critical('Object reinitialization error: object cannot be finalized again.') # Can not get finalized again.
|
|
53
55
|
self._initialized = True # Finalize object.
|
|
@@ -14,14 +14,10 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
|
|
16
16
|
from model_compression_toolkit.target_platform_capabilities.target_platform.fusing import Fusing
|
|
17
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.
|
|
21
|
-
get_default_quantization_config_options, TargetPlatformModel
|
|
22
|
-
|
|
23
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, \
|
|
24
|
-
QuantizationConfigOptions, AttributeQuantizationConfig
|
|
17
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import AttributeFilter
|
|
18
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities, OperationsSetToLayers, Smaller, SmallerEq, NotEq, Eq, GreaterEq, Greater, LayerFilterParams, OperationsToLayers, get_current_tpc
|
|
19
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.target_platform_model import get_default_quantization_config_options, TargetPlatformModel
|
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import OpQuantizationConfig, QuantizationConfigOptions, AttributeQuantizationConfig
|
|
25
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform.operators import OperatorsSet, OperatorSetConcat
|
|
26
22
|
|
|
27
23
|
from mct_quantizers import QuantizationMethod
|
|
@@ -40,7 +40,7 @@ class CurrentTPModel:
|
|
|
40
40
|
|
|
41
41
|
"""
|
|
42
42
|
if self.tp_model is None:
|
|
43
|
-
Logger.
|
|
43
|
+
Logger.critical('Target platform model is not initialized.') # pragma: no cover
|
|
44
44
|
return self.tp_model
|
|
45
45
|
|
|
46
46
|
def reset(self):
|