mct-nightly 2.2.0.20250113.527__py3-none-any.whl → 2.2.0.20250114.84821__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/RECORD +103 -105
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +26 -135
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +667 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +164 -470
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +30 -7
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/data_util.py +24 -5
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -134,11 +134,30 @@ class FixedSampleInfoDataset:
|
|
134
134
|
self.samples = samples
|
135
135
|
self.sample_info = sample_info
|
136
136
|
|
137
|
-
#
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
)
|
137
|
+
# Get the number of tensors in each tuple (corresponds to the number of input layers the model has)
|
138
|
+
num_tensors = len(samples[0])
|
139
|
+
|
140
|
+
# Create separate lists: one for each input layer and separate the tuples into lists
|
141
|
+
sample_tensor_lists = [[] for _ in range(num_tensors)]
|
142
|
+
for s in samples:
|
143
|
+
for i, data_tensor in enumerate(s):
|
144
|
+
sample_tensor_lists[i].append(data_tensor)
|
145
|
+
|
146
|
+
# In order to deal with models that have different input shapes for different layers, we need first to
|
147
|
+
# organize the data in a dictionary in order to use tf.data.Dataset.from_tensor_slices
|
148
|
+
samples_dict = {f'tensor_{i}': tensors for i, tensors in enumerate(sample_tensor_lists)}
|
149
|
+
info_dict = {f'info_{i}': tf.convert_to_tensor(info) for i, info in enumerate(self.sample_info)}
|
150
|
+
combined_dict = {**samples_dict, **info_dict}
|
151
|
+
|
152
|
+
tf_dataset = tf.data.Dataset.from_tensor_slices(combined_dict)
|
153
|
+
|
154
|
+
# Map the dataset to return tuples instead of dict
|
155
|
+
def reorganize_ds_outputs(ds_output):
|
156
|
+
tensors = tuple(ds_output[f'tensor_{i}'] for i in range(num_tensors))
|
157
|
+
infos = tuple(ds_output[f'info_{i}'] for i in range(len(sample_info)))
|
158
|
+
return tensors, infos
|
159
|
+
|
160
|
+
self.tf_dataset = tf_dataset.map(reorganize_ds_outputs)
|
142
161
|
|
143
162
|
def __len__(self):
|
144
163
|
return len(self.samples)
|
@@ -26,7 +26,7 @@ else:
|
|
26
26
|
|
27
27
|
from model_compression_toolkit.defaultdict import DefaultDict
|
28
28
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
29
|
-
from
|
29
|
+
from mct_quantizers import QuantizationMethod
|
30
30
|
from model_compression_toolkit.constants import SOFTMAX_THRESHOLD
|
31
31
|
from model_compression_toolkit.core.keras.constants import SOFTMAX, LINEAR, RELU, SWISH, SIGMOID, IDENTITY, TANH, SELU, \
|
32
32
|
KERNEL, DEPTHWISE_KERNEL, GELU
|
@@ -20,8 +20,7 @@ from model_compression_toolkit.core.common.mixed_precision.configurable_quantize
|
|
20
20
|
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
|
21
21
|
CandidateNodeQuantizationConfig
|
22
22
|
from model_compression_toolkit.logger import Logger
|
23
|
-
from
|
24
|
-
from mct_quantizers import QuantizationTarget
|
23
|
+
from mct_quantizers import QuantizationTarget, QuantizationMethod
|
25
24
|
from mct_quantizers import mark_quantizer
|
26
25
|
|
27
26
|
import tensorflow as tf
|
@@ -18,18 +18,17 @@ from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, Cor
|
|
18
18
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
19
19
|
from model_compression_toolkit.logger import Logger
|
20
20
|
from model_compression_toolkit.constants import TENSORFLOW
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
21
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities
|
23
22
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data
|
24
23
|
from model_compression_toolkit.verify_packages import FOUND_TF
|
25
24
|
|
26
25
|
if FOUND_TF:
|
26
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2keras import \
|
27
|
+
AttachTpcToKeras
|
27
28
|
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
|
28
29
|
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
|
29
30
|
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
|
30
31
|
from tensorflow.keras.models import Model
|
31
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
|
32
|
-
AttachTpcToKeras
|
33
32
|
|
34
33
|
from model_compression_toolkit import get_target_platform_capabilities
|
35
34
|
|
@@ -39,7 +38,7 @@ if FOUND_TF:
|
|
39
38
|
representative_data_gen: Callable,
|
40
39
|
core_config: CoreConfig = CoreConfig(
|
41
40
|
mixed_precision_config=MixedPrecisionQuantizationConfig()),
|
42
|
-
target_platform_capabilities:
|
41
|
+
target_platform_capabilities: TargetPlatformCapabilities = KERAS_DEFAULT_TPC
|
43
42
|
) -> ResourceUtilization:
|
44
43
|
"""
|
45
44
|
Computes resource utilization data that can be used to calculate the desired target resource utilization
|
@@ -51,7 +50,7 @@ if FOUND_TF:
|
|
51
50
|
in_model (Model): Keras model to quantize.
|
52
51
|
representative_data_gen (Callable): Dataset used for calibration.
|
53
52
|
core_config (CoreConfig): CoreConfig containing parameters for quantization and mixed precision of how the model should be quantized.
|
54
|
-
target_platform_capabilities (
|
53
|
+
target_platform_capabilities (FrameworkQuantizationCapabilities): FrameworkQuantizationCapabilities to optimize the Keras model according to.
|
55
54
|
|
56
55
|
Returns:
|
57
56
|
|
@@ -225,7 +225,7 @@ class PytorchModel(torch.nn.Module):
|
|
225
225
|
"""
|
226
226
|
super(PytorchModel, self).__init__()
|
227
227
|
self.graph = copy.deepcopy(graph)
|
228
|
-
delattr(self.graph, '
|
228
|
+
delattr(self.graph, 'fqc')
|
229
229
|
|
230
230
|
self.node_sort = list(topological_sort(self.graph))
|
231
231
|
self.node_to_activation_quantization_holder = {}
|
@@ -19,7 +19,7 @@ from torch import sigmoid
|
|
19
19
|
|
20
20
|
from model_compression_toolkit.defaultdict import DefaultDict
|
21
21
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
22
|
-
from
|
22
|
+
from mct_quantizers import QuantizationMethod
|
23
23
|
from model_compression_toolkit.constants import SOFTMAX_THRESHOLD
|
24
24
|
from model_compression_toolkit.core.pytorch.constants import KERNEL
|
25
25
|
from model_compression_toolkit.core.pytorch.quantizer.fake_quant_builder import power_of_two_quantization, \
|
@@ -21,7 +21,7 @@ from model_compression_toolkit.core.common.mixed_precision.configurable_quantize
|
|
21
21
|
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
|
22
22
|
CandidateNodeQuantizationConfig
|
23
23
|
from model_compression_toolkit.logger import Logger
|
24
|
-
from
|
24
|
+
from mct_quantizers import QuantizationMethod
|
25
25
|
from mct_quantizers import QuantizationTarget
|
26
26
|
from mct_quantizers import mark_quantizer
|
27
27
|
|
@@ -20,7 +20,7 @@ from model_compression_toolkit.core.common.mixed_precision.configurable_quantize
|
|
20
20
|
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
|
21
21
|
CandidateNodeQuantizationConfig
|
22
22
|
from model_compression_toolkit.logger import Logger
|
23
|
-
from
|
23
|
+
from mct_quantizers import QuantizationMethod
|
24
24
|
from mct_quantizers import QuantizationTarget
|
25
25
|
|
26
26
|
from mct_quantizers import mark_quantizer
|
@@ -17,8 +17,7 @@ from typing import Callable
|
|
17
17
|
|
18
18
|
from model_compression_toolkit.logger import Logger
|
19
19
|
from model_compression_toolkit.constants import PYTORCH
|
20
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
21
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities
|
22
21
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
23
22
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import compute_resource_utilization_data
|
24
23
|
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
|
@@ -30,7 +29,7 @@ if FOUND_TORCH:
|
|
30
29
|
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
|
31
30
|
from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
|
32
31
|
from torch.nn import Module
|
33
|
-
from model_compression_toolkit.target_platform_capabilities.
|
32
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2pytorch import \
|
34
33
|
AttachTpcToPytorch
|
35
34
|
|
36
35
|
from model_compression_toolkit import get_target_platform_capabilities
|
@@ -41,7 +40,7 @@ if FOUND_TORCH:
|
|
41
40
|
def pytorch_resource_utilization_data(in_model: Module,
|
42
41
|
representative_data_gen: Callable,
|
43
42
|
core_config: CoreConfig = CoreConfig(),
|
44
|
-
target_platform_capabilities:
|
43
|
+
target_platform_capabilities: TargetPlatformCapabilities= PYTORCH_DEFAULT_TPC
|
45
44
|
) -> ResourceUtilization:
|
46
45
|
"""
|
47
46
|
Computes resource utilization data that can be used to calculate the desired target resource utilization for mixed-precision quantization.
|
@@ -51,7 +50,7 @@ if FOUND_TORCH:
|
|
51
50
|
in_model (Model): PyTorch model to quantize.
|
52
51
|
representative_data_gen (Callable): Dataset used for calibration.
|
53
52
|
core_config (CoreConfig): CoreConfig containing parameters for quantization and mixed precision
|
54
|
-
target_platform_capabilities (
|
53
|
+
target_platform_capabilities (FrameworkQuantizationCapabilities): FrameworkQuantizationCapabilities to optimize the PyTorch model according to.
|
55
54
|
|
56
55
|
Returns:
|
57
56
|
|
@@ -12,44 +12,37 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from collections import namedtuple
|
16
15
|
|
17
16
|
import copy
|
18
|
-
|
19
|
-
from typing import Callable, Tuple, Any, List, Dict
|
20
|
-
|
21
|
-
import numpy as np
|
17
|
+
from typing import Callable, Any, List
|
22
18
|
|
23
19
|
from model_compression_toolkit.core.common import FrameworkInfo
|
20
|
+
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
24
21
|
from model_compression_toolkit.core.common.fusion.graph_fuser import GraphFuser
|
25
|
-
|
22
|
+
from model_compression_toolkit.core.common.graph.base_graph import Graph
|
26
23
|
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, \
|
27
24
|
SchedulerInfo
|
28
25
|
from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
|
29
26
|
from model_compression_toolkit.core.common.hessian.hessian_info_service import HessianInfoService
|
27
|
+
from model_compression_toolkit.core.common.mixed_precision.bit_width_setter import set_bit_widths
|
30
28
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_candidates_filter import \
|
31
29
|
filter_candidates_for_mixed_precision
|
30
|
+
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width
|
31
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
|
32
|
+
ResourceUtilization
|
33
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
|
34
|
+
ResourceUtilizationCalculator, TargetInclusionCriterion, BitwidthMode
|
32
35
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_data import \
|
33
36
|
requires_mixed_precision
|
34
|
-
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
|
35
|
-
from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
|
36
|
-
from model_compression_toolkit.logger import Logger
|
37
|
-
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
38
|
-
from model_compression_toolkit.core.common.graph.base_graph import Graph
|
39
|
-
from model_compression_toolkit.core.common.mixed_precision.bit_width_setter import set_bit_widths
|
40
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
|
41
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
42
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import ru_functions_mapping
|
43
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
44
|
-
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width
|
45
37
|
from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
|
46
38
|
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
|
47
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
48
|
-
from model_compression_toolkit.core.common.visualization.final_config_visualizer import \
|
49
|
-
WeightsFinalBitwidthConfigVisualizer, \
|
50
|
-
ActivationFinalBitwidthConfigVisualizer
|
51
39
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import TensorboardWriter, \
|
52
40
|
finalize_bitwidth_in_tb
|
41
|
+
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
|
42
|
+
from model_compression_toolkit.core.quantization_prep_runner import quantization_preparation_runner
|
43
|
+
from model_compression_toolkit.logger import Logger
|
44
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
|
45
|
+
FrameworkQuantizationCapabilities
|
53
46
|
|
54
47
|
|
55
48
|
def core_runner(in_model: Any,
|
@@ -57,7 +50,7 @@ def core_runner(in_model: Any,
|
|
57
50
|
core_config: CoreConfig,
|
58
51
|
fw_info: FrameworkInfo,
|
59
52
|
fw_impl: FrameworkImplementation,
|
60
|
-
|
53
|
+
fqc: FrameworkQuantizationCapabilities,
|
61
54
|
target_resource_utilization: ResourceUtilization = None,
|
62
55
|
running_gptq: bool = False,
|
63
56
|
tb_w: TensorboardWriter = None):
|
@@ -77,7 +70,7 @@ def core_runner(in_model: Any,
|
|
77
70
|
fw_info: Information needed for quantization about the specific framework (e.g., kernel channels indices,
|
78
71
|
groups of layers by how they should be quantized, etc.).
|
79
72
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
80
|
-
|
73
|
+
fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
|
81
74
|
the attached framework operator's information.
|
82
75
|
target_resource_utilization: ResourceUtilization to constraint the search of the mixed-precision configuration for the model.
|
83
76
|
tb_w: TensorboardWriter object for logging
|
@@ -88,7 +81,7 @@ def core_runner(in_model: Any,
|
|
88
81
|
"""
|
89
82
|
|
90
83
|
# Warn is representative dataset has batch-size == 1
|
91
|
-
batch_data = iter(representative_data_gen())
|
84
|
+
batch_data = next(iter(representative_data_gen()))
|
92
85
|
if isinstance(batch_data, list):
|
93
86
|
batch_data = batch_data[0]
|
94
87
|
if batch_data.shape[0] == 1:
|
@@ -96,7 +89,7 @@ def core_runner(in_model: Any,
|
|
96
89
|
' consider increasing the batch size')
|
97
90
|
|
98
91
|
# Checking whether to run mixed precision quantization
|
99
|
-
if target_resource_utilization is not None:
|
92
|
+
if target_resource_utilization is not None and target_resource_utilization.is_any_restricted():
|
100
93
|
if core_config.mixed_precision_config is None:
|
101
94
|
Logger.critical("Provided an initialized target_resource_utilization, that means that mixed precision quantization is "
|
102
95
|
"enabled, but the provided MixedPrecisionQuantizationConfig is None.")
|
@@ -105,7 +98,7 @@ def core_runner(in_model: Any,
|
|
105
98
|
target_resource_utilization,
|
106
99
|
representative_data_gen,
|
107
100
|
core_config,
|
108
|
-
|
101
|
+
fqc,
|
109
102
|
fw_info,
|
110
103
|
fw_impl):
|
111
104
|
core_config.mixed_precision_config.set_mixed_precision_enable()
|
@@ -116,7 +109,7 @@ def core_runner(in_model: Any,
|
|
116
109
|
core_config.quantization_config,
|
117
110
|
fw_info,
|
118
111
|
fw_impl,
|
119
|
-
|
112
|
+
fqc,
|
120
113
|
core_config.bit_width_config,
|
121
114
|
tb_w,
|
122
115
|
mixed_precision_enable=core_config.is_mixed_precision_enabled,
|
@@ -138,7 +131,7 @@ def core_runner(in_model: Any,
|
|
138
131
|
if core_config.is_mixed_precision_enabled:
|
139
132
|
if core_config.mixed_precision_config.configuration_overwrite is None:
|
140
133
|
|
141
|
-
filter_candidates_for_mixed_precision(graph, target_resource_utilization, fw_info,
|
134
|
+
filter_candidates_for_mixed_precision(graph, target_resource_utilization, fw_info, fqc)
|
142
135
|
bit_widths_config = search_bit_width(tg,
|
143
136
|
fw_info,
|
144
137
|
fw_impl,
|
@@ -177,7 +170,6 @@ def core_runner(in_model: Any,
|
|
177
170
|
|
178
171
|
_set_final_resource_utilization(graph=tg,
|
179
172
|
final_bit_widths_config=bit_widths_config,
|
180
|
-
ru_functions_dict=ru_functions_mapping,
|
181
173
|
fw_info=fw_info,
|
182
174
|
fw_impl=fw_impl)
|
183
175
|
|
@@ -215,7 +207,6 @@ def core_runner(in_model: Any,
|
|
215
207
|
|
216
208
|
def _set_final_resource_utilization(graph: Graph,
|
217
209
|
final_bit_widths_config: List[int],
|
218
|
-
ru_functions_dict: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
|
219
210
|
fw_info: FrameworkInfo,
|
220
211
|
fw_impl: FrameworkImplementation):
|
221
212
|
"""
|
@@ -225,39 +216,21 @@ def _set_final_resource_utilization(graph: Graph,
|
|
225
216
|
Args:
|
226
217
|
graph: Graph to compute the resource utilization for.
|
227
218
|
final_bit_widths_config: The final bit-width configuration to quantize the model accordingly.
|
228
|
-
ru_functions_dict: A mapping between a RUTarget and a pair of resource utilization method and resource utilization aggregation functions.
|
229
219
|
fw_info: A FrameworkInfo object.
|
230
220
|
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
231
221
|
|
232
222
|
"""
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
elif len(final_bit_widths_config) > 0 and len(non_conf_ru) == 0:
|
246
|
-
final_ru_dict[ru_target] = ru_aggr(conf_ru, False)[0]
|
247
|
-
elif len(final_bit_widths_config) == 0 and len(non_conf_ru) > 0:
|
248
|
-
# final_bit_widths_config == 0 ==> no configurable nodes,
|
249
|
-
# thus, ru can be computed from non_conf_ru alone
|
250
|
-
final_ru_dict[ru_target] = ru_aggr(non_conf_ru, False)[0]
|
251
|
-
else:
|
252
|
-
# No relevant nodes have been quantized with affect on the given target - since we only consider
|
253
|
-
# in the model's final size the quantized layers size, this means that the final size for this target
|
254
|
-
# is zero.
|
255
|
-
Logger.warning(f"No relevant quantized layers for the ru target {ru_target} were found, the recorded "
|
256
|
-
f"final ru for this target would be 0.")
|
257
|
-
final_ru_dict[ru_target] = 0
|
258
|
-
|
259
|
-
final_ru = ResourceUtilization()
|
260
|
-
final_ru.set_resource_utilization_by_target(final_ru_dict)
|
261
|
-
print(final_ru)
|
223
|
+
w_qcs = {n: n.final_weights_quantization_cfg for n in graph.nodes}
|
224
|
+
a_qcs = {n: n.final_activation_quantization_cfg for n in graph.nodes}
|
225
|
+
ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
|
226
|
+
final_ru = ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, BitwidthMode.QCustom,
|
227
|
+
act_qcs=a_qcs, w_qcs=w_qcs)
|
228
|
+
|
229
|
+
for ru_target, ru in final_ru.get_resource_utilization_dict().items():
|
230
|
+
if ru == 0:
|
231
|
+
Logger.warning(f"No relevant quantized layers for the resource utilization target {ru_target} were found, "
|
232
|
+
f"the recorded final ru for this target would be 0.")
|
233
|
+
|
234
|
+
Logger.info(f'Resource utilization (of quantized targets):\n {str(final_ru)}.')
|
262
235
|
graph.user_info.final_resource_utilization = final_ru
|
263
236
|
graph.user_info.mixed_precision_cfg = final_bit_widths_config
|
@@ -20,7 +20,7 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
|
|
20
20
|
NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig
|
21
21
|
|
22
22
|
from model_compression_toolkit.logger import Logger
|
23
|
-
from
|
23
|
+
from mct_quantizers import QuantizationMethod
|
24
24
|
from mct_quantizers import QuantizationTarget
|
25
25
|
from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
|
26
26
|
from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer
|
@@ -21,7 +21,7 @@ from model_compression_toolkit.constants import THRESHOLD, SIGNED, RANGE_MIN, RA
|
|
21
21
|
from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \
|
22
22
|
NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig
|
23
23
|
from model_compression_toolkit.logger import Logger
|
24
|
-
from
|
24
|
+
from mct_quantizers import QuantizationMethod
|
25
25
|
from mct_quantizers import QuantizationTarget
|
26
26
|
from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
|
27
27
|
from mct_quantizers import \
|
@@ -22,7 +22,9 @@ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR
|
|
22
22
|
LR_BIAS_DEFAULT, GPTQ_MOMENTUM, REG_DEFAULT_SLA
|
23
23
|
from model_compression_toolkit.logger import Logger
|
24
24
|
from model_compression_toolkit.constants import TENSORFLOW, ACT_HESSIAN_DEFAULT_BATCH_SIZE, GPTQ_HESSIAN_NUM_SAMPLES
|
25
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
25
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities
|
26
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2keras import \
|
27
|
+
AttachTpcToKeras
|
26
28
|
from model_compression_toolkit.verify_packages import FOUND_TF
|
27
29
|
from model_compression_toolkit.core.common.user_info import UserInformation
|
28
30
|
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig, \
|
@@ -33,7 +35,6 @@ from model_compression_toolkit.core import CoreConfig
|
|
33
35
|
from model_compression_toolkit.core.runner import core_runner
|
34
36
|
from model_compression_toolkit.gptq.runner import gptq_runner
|
35
37
|
from model_compression_toolkit.core.analyzer import analyzer_model_quantization
|
36
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
37
38
|
from model_compression_toolkit.metadata import create_model_metadata
|
38
39
|
|
39
40
|
|
@@ -48,8 +49,6 @@ if FOUND_TF:
|
|
48
49
|
from model_compression_toolkit.exporter.model_wrapper import get_exportable_keras_model
|
49
50
|
from model_compression_toolkit import get_target_platform_capabilities
|
50
51
|
from mct_quantizers.keras.metadata import add_metadata
|
51
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attach2keras import \
|
52
|
-
AttachTpcToKeras
|
53
52
|
|
54
53
|
# As from TF2.9 optimizers package is changed
|
55
54
|
if version.parse(tf.__version__) < version.parse("2.9"):
|
@@ -157,7 +156,7 @@ if FOUND_TF:
|
|
157
156
|
gptq_representative_data_gen: Callable = None,
|
158
157
|
target_resource_utilization: ResourceUtilization = None,
|
159
158
|
core_config: CoreConfig = CoreConfig(),
|
160
|
-
target_platform_capabilities:
|
159
|
+
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]:
|
161
160
|
"""
|
162
161
|
Quantize a trained Keras model using post-training quantization. The model is quantized using a
|
163
162
|
symmetric constraint quantization thresholds (power of two).
|
@@ -244,7 +243,7 @@ if FOUND_TF:
|
|
244
243
|
|
245
244
|
# Attach tpc model to framework
|
246
245
|
attach2keras = AttachTpcToKeras()
|
247
|
-
|
246
|
+
framework_platform_capabilities = attach2keras.attach(
|
248
247
|
target_platform_capabilities,
|
249
248
|
custom_opset2layer=core_config.quantization_config.custom_tpc_opset_to_layer)
|
250
249
|
|
@@ -253,7 +252,7 @@ if FOUND_TF:
|
|
253
252
|
core_config=core_config,
|
254
253
|
fw_info=DEFAULT_KERAS_INFO,
|
255
254
|
fw_impl=fw_impl,
|
256
|
-
|
255
|
+
fqc=framework_platform_capabilities,
|
257
256
|
target_resource_utilization=target_resource_utilization,
|
258
257
|
tb_w=tb_w,
|
259
258
|
running_gptq=True)
|
@@ -281,9 +280,9 @@ if FOUND_TF:
|
|
281
280
|
DEFAULT_KERAS_INFO)
|
282
281
|
|
283
282
|
exportable_model, user_info = get_exportable_keras_model(tg_gptq)
|
284
|
-
if
|
283
|
+
if framework_platform_capabilities.tpc.add_metadata:
|
285
284
|
exportable_model = add_metadata(exportable_model,
|
286
|
-
create_model_metadata(
|
285
|
+
create_model_metadata(fqc=framework_platform_capabilities,
|
287
286
|
scheduling_info=scheduling_info))
|
288
287
|
return exportable_model, user_info
|
289
288
|
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
18
18
|
|
19
19
|
from model_compression_toolkit.gptq import RoundingType
|
20
20
|
from model_compression_toolkit.core.common import max_power_of_two
|
21
|
-
from
|
21
|
+
from mct_quantizers import QuantizationMethod
|
22
22
|
from mct_quantizers import QuantizationTarget
|
23
23
|
from model_compression_toolkit.gptq.common.gptq_constants import PTQ_THRESHOLD, SCALE_PTQ, \
|
24
24
|
SOFT_ROUNDING_GAMMA, SOFT_ROUNDING_ZETA, AUXVAR
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
18
18
|
|
19
19
|
from model_compression_toolkit.gptq import RoundingType
|
20
20
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
21
|
-
from
|
21
|
+
from mct_quantizers import QuantizationMethod
|
22
22
|
from mct_quantizers import QuantizationTarget
|
23
23
|
from model_compression_toolkit.gptq.common.gptq_constants import \
|
24
24
|
SOFT_ROUNDING_GAMMA, SOFT_ROUNDING_ZETA, AUXVAR
|
@@ -19,7 +19,7 @@ import numpy as np
|
|
19
19
|
import tensorflow as tf
|
20
20
|
|
21
21
|
from model_compression_toolkit.gptq import RoundingType
|
22
|
-
from
|
22
|
+
from mct_quantizers import QuantizationMethod
|
23
23
|
from mct_quantizers import QuantizationTarget
|
24
24
|
from model_compression_toolkit.gptq.common.gptq_constants import AUXVAR, PTQ_THRESHOLD
|
25
25
|
from model_compression_toolkit.gptq.keras.quantizer import quant_utils as qutils
|
@@ -31,8 +31,7 @@ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR
|
|
31
31
|
from model_compression_toolkit.gptq.runner import gptq_runner
|
32
32
|
from model_compression_toolkit.logger import Logger
|
33
33
|
from model_compression_toolkit.metadata import create_model_metadata
|
34
|
-
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import
|
35
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
34
|
+
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities
|
36
35
|
from model_compression_toolkit.verify_packages import FOUND_TORCH
|
37
36
|
|
38
37
|
|
@@ -48,7 +47,7 @@ if FOUND_TORCH:
|
|
48
47
|
from torch.optim import Adam, Optimizer
|
49
48
|
from model_compression_toolkit import get_target_platform_capabilities
|
50
49
|
from mct_quantizers.pytorch.metadata import add_metadata
|
51
|
-
from model_compression_toolkit.target_platform_capabilities.
|
50
|
+
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2pytorch import \
|
52
51
|
AttachTpcToPytorch
|
53
52
|
|
54
53
|
DEFAULT_PYTORCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
|
@@ -146,11 +145,11 @@ if FOUND_TORCH:
|
|
146
145
|
core_config: CoreConfig = CoreConfig(),
|
147
146
|
gptq_config: GradientPTQConfig = None,
|
148
147
|
gptq_representative_data_gen: Callable = None,
|
149
|
-
target_platform_capabilities:
|
148
|
+
target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC):
|
150
149
|
"""
|
151
150
|
Quantize a trained Pytorch module using post-training quantization.
|
152
151
|
By default, the module is quantized using a symmetric constraint quantization thresholds
|
153
|
-
(power of two) as defined in the default
|
152
|
+
(power of two) as defined in the default FrameworkQuantizationCapabilities.
|
154
153
|
The module is first optimized using several transformations (e.g. BatchNormalization folding to
|
155
154
|
preceding layers). Then, using a given dataset, statistics (e.g. min/max, histogram, etc.) are
|
156
155
|
being collected for each layer's output (and input, depends on the quantization configuration).
|
@@ -217,7 +216,7 @@ if FOUND_TORCH:
|
|
217
216
|
|
218
217
|
# Attach tpc model to framework
|
219
218
|
attach2pytorch = AttachTpcToPytorch()
|
220
|
-
|
219
|
+
framework_quantization_capabilities = attach2pytorch.attach(target_platform_capabilities,
|
221
220
|
core_config.quantization_config.custom_tpc_opset_to_layer)
|
222
221
|
|
223
222
|
# ---------------------- #
|
@@ -228,7 +227,7 @@ if FOUND_TORCH:
|
|
228
227
|
core_config=core_config,
|
229
228
|
fw_info=DEFAULT_PYTORCH_INFO,
|
230
229
|
fw_impl=fw_impl,
|
231
|
-
|
230
|
+
fqc=framework_quantization_capabilities,
|
232
231
|
target_resource_utilization=target_resource_utilization,
|
233
232
|
tb_w=tb_w,
|
234
233
|
running_gptq=True)
|
@@ -257,9 +256,9 @@ if FOUND_TORCH:
|
|
257
256
|
DEFAULT_PYTORCH_INFO)
|
258
257
|
|
259
258
|
exportable_model, user_info = get_exportable_pytorch_model(graph_gptq)
|
260
|
-
if
|
259
|
+
if framework_quantization_capabilities.tpc.add_metadata:
|
261
260
|
exportable_model = add_metadata(exportable_model,
|
262
|
-
create_model_metadata(
|
261
|
+
create_model_metadata(fqc=framework_quantization_capabilities,
|
263
262
|
scheduling_info=scheduling_info))
|
264
263
|
return exportable_model, user_info
|
265
264
|
|
@@ -18,7 +18,7 @@ from typing import Dict
|
|
18
18
|
import numpy as np
|
19
19
|
|
20
20
|
from model_compression_toolkit.core.common import max_power_of_two
|
21
|
-
from
|
21
|
+
from mct_quantizers import QuantizationMethod
|
22
22
|
from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
|
23
23
|
from model_compression_toolkit.gptq.common.gptq_config import RoundingType
|
24
24
|
from model_compression_toolkit.gptq.pytorch.quantizer.base_pytorch_gptq_quantizer import \
|
@@ -18,7 +18,7 @@ from typing import Dict
|
|
18
18
|
import numpy as np
|
19
19
|
|
20
20
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
21
|
-
from
|
21
|
+
from mct_quantizers import QuantizationMethod
|
22
22
|
from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
|
23
23
|
from model_compression_toolkit.gptq.common.gptq_config import RoundingType
|
24
24
|
from model_compression_toolkit.gptq.pytorch.quantizer.base_pytorch_gptq_quantizer import \
|
@@ -18,7 +18,7 @@ from typing import Dict
|
|
18
18
|
import numpy as np
|
19
19
|
from model_compression_toolkit.defaultdict import DefaultDict
|
20
20
|
|
21
|
-
from
|
21
|
+
from mct_quantizers import QuantizationMethod
|
22
22
|
from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
|
23
23
|
from model_compression_toolkit.gptq.common.gptq_config import RoundingType
|
24
24
|
from model_compression_toolkit.gptq.pytorch.quantizer.base_pytorch_gptq_quantizer import \
|