mct-nightly 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240419.358__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/METADATA +1 -1
- {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/RECORD +60 -57
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +2 -0
- model_compression_toolkit/core/common/graph/base_graph.py +2 -2
- model_compression_toolkit/core/common/graph/base_node.py +26 -9
- model_compression_toolkit/core/common/graph/functional_node.py +18 -1
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
- model_compression_toolkit/core/common/hessian/trace_hessian_request.py +1 -3
- model_compression_toolkit/core/common/network_editors/node_filters.py +4 -3
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +0 -5
- model_compression_toolkit/core/common/quantization/quantization_config.py +5 -2
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +67 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +12 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +14 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +30 -3
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +17 -7
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +14 -3
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +13 -3
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +16 -3
- model_compression_toolkit/core/common/similarity_analyzer.py +16 -4
- model_compression_toolkit/core/common/substitutions/remove_identity.py +48 -0
- model_compression_toolkit/core/graph_prep_runner.py +10 -4
- model_compression_toolkit/core/keras/back2framework/keras_model_builder.py +4 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_folding.py +7 -7
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/remove_identity.py +51 -0
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/keras/keras_implementation.py +13 -11
- model_compression_toolkit/core/keras/keras_node_prior_info.py +4 -4
- model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +4 -5
- model_compression_toolkit/core/keras/reader/common.py +2 -2
- model_compression_toolkit/core/keras/reader/node_builder.py +28 -9
- model_compression_toolkit/core/keras/tf_tensor_numpy.py +5 -2
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +34 -21
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/batchnorm_folding.py +8 -8
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +4 -4
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py +50 -0
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +8 -8
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +7 -6
- model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py +2 -2
- model_compression_toolkit/core/quantization_prep_runner.py +6 -2
- model_compression_toolkit/core/runner.py +5 -2
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +5 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +9 -2
- model_compression_toolkit/gptq/keras/quantization_facade.py +2 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +3 -1
- model_compression_toolkit/gptq/runner.py +1 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +5 -5
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +1 -1
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +20 -6
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +1 -1
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +22 -8
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +1 -1
- {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/WHEEL +0 -0
- {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
import torch
|
|
16
|
+
|
|
17
|
+
from model_compression_toolkit.core.common.substitutions.remove_identity import remove_identity_node
|
|
18
|
+
from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
|
|
19
|
+
from model_compression_toolkit.core import common
|
|
20
|
+
from model_compression_toolkit.core.common.graph.base_graph import Graph
|
|
21
|
+
from model_compression_toolkit.core.common.graph.base_node import BaseNode
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RemoveIdentity(common.BaseSubstitution):
|
|
25
|
+
"""
|
|
26
|
+
Remove `torch.nn.Identity` layers from the graph.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
nodes = NodeOperationMatcher(torch.nn.Identity)
|
|
31
|
+
super().__init__(matcher_instance=nodes)
|
|
32
|
+
|
|
33
|
+
def substitute(self,
|
|
34
|
+
graph: Graph,
|
|
35
|
+
node: BaseNode) -> Graph:
|
|
36
|
+
"""
|
|
37
|
+
The method to perform the substitution of the `torch.nn.Identity` node by
|
|
38
|
+
reconnecting its input directly to its output, effectively removing the node
|
|
39
|
+
from the graph.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
graph: The current graph of operations where the node resides.
|
|
43
|
+
node: The specific `BaseNode` that is matched to be an Identity operation.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Graph: The updated graph after removing the identity node.
|
|
47
|
+
"""
|
|
48
|
+
return remove_identity_node(graph, node)
|
|
49
|
+
|
|
50
|
+
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py
CHANGED
|
@@ -46,7 +46,7 @@ def residual_collapsing_fn(first_node: BaseNode,
|
|
|
46
46
|
Returns:
|
|
47
47
|
The modified layer node's weights: kernel
|
|
48
48
|
"""
|
|
49
|
-
if first_node.
|
|
49
|
+
if first_node.is_match_type(Conv2d):
|
|
50
50
|
# Get nodes attributes
|
|
51
51
|
kernel = first_node.get_weights_by_keys(kernel_str)
|
|
52
52
|
(Cout, Cin, kH, kW) = kernel.shape
|
|
@@ -76,9 +76,9 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
|
|
|
76
76
|
pruned_parameters = {}
|
|
77
77
|
mask_bool = output_mask.astype(bool)
|
|
78
78
|
node.weights = pruned_parameters
|
|
79
|
-
if node.
|
|
79
|
+
if node.is_match_type(torch.nn.BatchNorm2d):
|
|
80
80
|
node.framework_attr[NUM_FEATURES] = int(np.sum(input_mask))
|
|
81
|
-
elif node.
|
|
81
|
+
elif node.is_match_type(torch.nn.PReLU):
|
|
82
82
|
if node.framework_attr[NUM_PARAMETERS] > 1:
|
|
83
83
|
node.framework_attr[NUM_PARAMETERS] = int(np.sum(input_mask))
|
|
84
84
|
else:
|
|
@@ -227,9 +227,9 @@ def _is_pytorch_node_pruning_section_edge(node: BaseNode) -> bool:
|
|
|
227
227
|
"""
|
|
228
228
|
|
|
229
229
|
# Check if the node is a Conv2D or Conv2DTranspose layer with groups set to 1.
|
|
230
|
-
if node.
|
|
230
|
+
if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
|
|
231
231
|
return node.framework_attr[GROUPS] == 1
|
|
232
|
-
return node.
|
|
232
|
+
return node.is_match_type(torch.nn.Linear)
|
|
233
233
|
|
|
234
234
|
|
|
235
235
|
def _prune_pytorch_edge_node(node: BaseNode,
|
|
@@ -268,18 +268,18 @@ def _prune_pytorch_edge_node(node: BaseNode,
|
|
|
268
268
|
if not is_exit_node:
|
|
269
269
|
# Update 'out_channels' or 'out_features' attributes for entry nodes
|
|
270
270
|
# Conv2d,ConvTranspose2d / Linear layers.
|
|
271
|
-
if node.
|
|
271
|
+
if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
|
|
272
272
|
node.framework_attr[OUT_CHANNELS] = int(np.sum(mask))
|
|
273
|
-
elif node.
|
|
273
|
+
elif node.is_match_type(torch.nn.Linear):
|
|
274
274
|
node.framework_attr[OUT_FEATURES] = int(np.sum(mask))
|
|
275
275
|
else:
|
|
276
276
|
Logger.critical(f"{node.type} is currently not supported"
|
|
277
277
|
f"as an edge node in a pruning section")
|
|
278
278
|
|
|
279
279
|
if is_exit_node:
|
|
280
|
-
if node.
|
|
280
|
+
if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
|
|
281
281
|
node.framework_attr[IN_CHANNELS] = int(np.sum(mask))
|
|
282
|
-
elif node.
|
|
282
|
+
elif node.is_match_type(torch.nn.Linear):
|
|
283
283
|
node.framework_attr[IN_FEATURES] = int(np.sum(mask))
|
|
284
284
|
else:
|
|
285
285
|
Logger.critical(f"{node.type} is currently not supported"
|
|
@@ -58,6 +58,7 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.co
|
|
|
58
58
|
FunctionalConvSubstitution
|
|
59
59
|
from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.relu_bound_to_power_of_2 import \
|
|
60
60
|
ReLUBoundToPowerOfTwo
|
|
61
|
+
from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.remove_identity import RemoveIdentity
|
|
61
62
|
from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.reshape_with_static_shapes import \
|
|
62
63
|
ReshapeWithStaticShapes
|
|
63
64
|
from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.residual_collapsing import \
|
|
@@ -238,7 +239,8 @@ class PytorchImplementation(FrameworkImplementation):
|
|
|
238
239
|
PermuteCallMethod(),
|
|
239
240
|
FunctionalConvSubstitution(fw_info),
|
|
240
241
|
FunctionalBatchNorm(),
|
|
241
|
-
FunctionalLayerNorm()
|
|
242
|
+
FunctionalLayerNorm(),
|
|
243
|
+
RemoveIdentity()]
|
|
242
244
|
|
|
243
245
|
def get_substitutions_pre_statistics_collection(self,
|
|
244
246
|
quant_config: QuantizationConfig
|
|
@@ -396,8 +398,8 @@ class PytorchImplementation(FrameworkImplementation):
|
|
|
396
398
|
Returns: True if the node should be considered an interest point, False otherwise.
|
|
397
399
|
"""
|
|
398
400
|
|
|
399
|
-
if node.
|
|
400
|
-
|
|
401
|
+
if any([node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
|
|
402
|
+
softmax, operator.add, add, cat, operator.concat]]):
|
|
401
403
|
return True
|
|
402
404
|
return False
|
|
403
405
|
|
|
@@ -462,12 +464,12 @@ class PytorchImplementation(FrameworkImplementation):
|
|
|
462
464
|
kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
|
|
463
465
|
output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
|
|
464
466
|
|
|
465
|
-
if node.
|
|
467
|
+
if node.is_match_type(Conv2d) or node.is_match_type(ConvTranspose2d):
|
|
466
468
|
# (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
|
|
467
469
|
return np.prod([x for x in output_shape if x is not None]) * \
|
|
468
470
|
kernel_shape[input_channel_axis] * \
|
|
469
471
|
(kernel_shape[0] * kernel_shape[1])
|
|
470
|
-
elif node.
|
|
472
|
+
elif node.is_match_type(Linear):
|
|
471
473
|
# IN * OUT
|
|
472
474
|
return kernel_shape[0] * kernel_shape[1]
|
|
473
475
|
else:
|
|
@@ -550,7 +552,6 @@ class PytorchImplementation(FrameworkImplementation):
|
|
|
550
552
|
Returns:
|
|
551
553
|
weight_quantizers: A dictionary between a weight's name to its quantizer.
|
|
552
554
|
activation_quantizers: A list of activations quantization, one for each layer output.
|
|
553
|
-
|
|
554
555
|
"""
|
|
555
556
|
|
|
556
557
|
return get_inferable_quantizers(node,
|
|
@@ -62,7 +62,7 @@ def _get_mean_std_outputs(node: BaseNode,
|
|
|
62
62
|
"""
|
|
63
63
|
mean_output, std_output = None, None
|
|
64
64
|
|
|
65
|
-
if node.
|
|
65
|
+
if node.is_match_type(BatchNorm2d):
|
|
66
66
|
mean_output = node.get_weights_by_keys(BETA)
|
|
67
67
|
if node.get_weights_by_keys(GAMMA) is None:
|
|
68
68
|
std_output = 1.0
|
|
@@ -72,7 +72,7 @@ def _get_mean_std_outputs(node: BaseNode,
|
|
|
72
72
|
mean_output = 0.0
|
|
73
73
|
else:
|
|
74
74
|
next_node_list = graph.get_next_nodes(node)
|
|
75
|
-
bn_nodes = [bn_node for bn_node in next_node_list if bn_node.
|
|
75
|
+
bn_nodes = [bn_node for bn_node in next_node_list if bn_node.is_match_type(BatchNorm2d)]
|
|
76
76
|
if len(bn_nodes) != 0:
|
|
77
77
|
bn_node = bn_nodes[0]
|
|
78
78
|
moving_variance = bn_node.get_weights_by_keys(MOVING_VARIANCE)
|
|
@@ -21,6 +21,7 @@ from tqdm import tqdm
|
|
|
21
21
|
from model_compression_toolkit.core.common import FrameworkInfo
|
|
22
22
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
|
23
23
|
from model_compression_toolkit.core.common.graph.base_graph import Graph
|
|
24
|
+
from model_compression_toolkit.core.common.hessian import HessianInfoService
|
|
24
25
|
from model_compression_toolkit.core.common.model_collector import ModelCollector
|
|
25
26
|
from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
|
|
26
27
|
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
|
|
@@ -38,7 +39,8 @@ def quantization_preparation_runner(graph: Graph,
|
|
|
38
39
|
core_config: CoreConfig,
|
|
39
40
|
fw_info: FrameworkInfo,
|
|
40
41
|
fw_impl: FrameworkImplementation,
|
|
41
|
-
tb_w: TensorboardWriter = None
|
|
42
|
+
tb_w: TensorboardWriter = None,
|
|
43
|
+
hessian_info_service: HessianInfoService = None,) -> Graph:
|
|
42
44
|
"""
|
|
43
45
|
Prepares a trained model for post-training quantization.
|
|
44
46
|
First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
|
|
@@ -55,6 +57,7 @@ def quantization_preparation_runner(graph: Graph,
|
|
|
55
57
|
groups of layers by how they should be quantized, etc.).
|
|
56
58
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
|
57
59
|
tb_w: TensorboardWriter object for logging
|
|
60
|
+
hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
|
|
58
61
|
|
|
59
62
|
Returns:
|
|
60
63
|
Graph object that represents the model, contains thresholds, and ready for quantization.
|
|
@@ -86,7 +89,8 @@ def quantization_preparation_runner(graph: Graph,
|
|
|
86
89
|
######################################
|
|
87
90
|
# Calculate quantization params
|
|
88
91
|
######################################
|
|
89
|
-
|
|
92
|
+
|
|
93
|
+
calculate_quantization_params(graph, hessian_info_service=hessian_info_service)
|
|
90
94
|
|
|
91
95
|
if tb_w is not None:
|
|
92
96
|
tb_w.add_graph(graph, 'thresholds_selection')
|
|
@@ -48,6 +48,7 @@ def core_runner(in_model: Any,
|
|
|
48
48
|
fw_impl: FrameworkImplementation,
|
|
49
49
|
tpc: TargetPlatformCapabilities,
|
|
50
50
|
target_resource_utilization: ResourceUtilization = None,
|
|
51
|
+
running_gptq: bool = False,
|
|
51
52
|
tb_w: TensorboardWriter = None):
|
|
52
53
|
"""
|
|
53
54
|
Quantize a trained model using post-training quantization.
|
|
@@ -97,7 +98,8 @@ def core_runner(in_model: Any,
|
|
|
97
98
|
fw_impl,
|
|
98
99
|
tpc,
|
|
99
100
|
tb_w,
|
|
100
|
-
mixed_precision_enable=core_config.mixed_precision_enable
|
|
101
|
+
mixed_precision_enable=core_config.mixed_precision_enable,
|
|
102
|
+
running_gptq=running_gptq)
|
|
101
103
|
|
|
102
104
|
hessian_info_service = HessianInfoService(graph=graph,
|
|
103
105
|
representative_dataset=representative_data_gen,
|
|
@@ -108,7 +110,8 @@ def core_runner(in_model: Any,
|
|
|
108
110
|
core_config=core_config,
|
|
109
111
|
fw_info=fw_info,
|
|
110
112
|
fw_impl=fw_impl,
|
|
111
|
-
tb_w=tb_w
|
|
113
|
+
tb_w=tb_w,
|
|
114
|
+
hessian_info_service=hessian_info_service)
|
|
112
115
|
|
|
113
116
|
######################################
|
|
114
117
|
# Finalize bit widths
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py
CHANGED
|
@@ -42,8 +42,12 @@ if FOUND_TF:
|
|
|
42
42
|
"""
|
|
43
43
|
weights_quantizers, _ = fw_impl.get_inferable_quantizers(node)
|
|
44
44
|
if len(weights_quantizers) > 0:
|
|
45
|
+
# for positional weights we need to extract the weight's value.
|
|
46
|
+
weights_values = {attr: node.get_weights_by_keys(attr)
|
|
47
|
+
for attr in weights_quantizers if isinstance(attr, int)}
|
|
45
48
|
return KerasQuantizationWrapper(layer,
|
|
46
|
-
weights_quantizers
|
|
49
|
+
weights_quantizers,
|
|
50
|
+
weights_values)
|
|
47
51
|
return layer
|
|
48
52
|
|
|
49
53
|
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py
CHANGED
|
@@ -29,7 +29,7 @@ if FOUND_TORCH:
|
|
|
29
29
|
|
|
30
30
|
def fully_quantized_wrapper(node: common.BaseNode,
|
|
31
31
|
module: torch.nn.Module,
|
|
32
|
-
fw_impl) -> Union[torch.nn.Module,PytorchQuantizationWrapper]:
|
|
32
|
+
fw_impl) -> Union[torch.nn.Module, PytorchQuantizationWrapper]:
|
|
33
33
|
"""
|
|
34
34
|
A function which takes a computational graph node and a pytorch module and
|
|
35
35
|
perform the quantization wrapping
|
|
@@ -37,20 +37,26 @@ if FOUND_TORCH:
|
|
|
37
37
|
Args:
|
|
38
38
|
node: A node of mct graph.
|
|
39
39
|
module: A Pytorch module
|
|
40
|
+
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
|
40
41
|
Returns: Wrapped layer
|
|
41
42
|
|
|
42
43
|
"""
|
|
43
44
|
weight_quantizers, _ = fw_impl.get_inferable_quantizers(node)
|
|
44
45
|
if len(weight_quantizers) > 0:
|
|
45
|
-
|
|
46
|
+
# for positional weights we need to extract the weight's value.
|
|
47
|
+
weights_values = {attr: fw_impl.to_tensor(node.get_weights_by_keys(attr))
|
|
48
|
+
for attr in weight_quantizers if isinstance(attr, int)}
|
|
49
|
+
return PytorchQuantizationWrapper(module, weight_quantizers, weights_values)
|
|
46
50
|
return module
|
|
47
51
|
|
|
52
|
+
|
|
48
53
|
def get_activation_quantizer_holder(node: BaseNode, fw_impl) -> Callable:
|
|
49
54
|
"""
|
|
50
55
|
Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
|
|
51
56
|
If the layer is not supposed to be wrapped with an activation quantizer - return None.
|
|
52
57
|
Args:
|
|
53
58
|
node: Node to attach a PytorchActivationQuantizationHolder to its output.
|
|
59
|
+
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
|
54
60
|
Returns:
|
|
55
61
|
A PytorchActivationQuantizationHolder module for the node's activation quantization.
|
|
56
62
|
"""
|
|
@@ -64,6 +70,7 @@ if FOUND_TORCH:
|
|
|
64
70
|
f'PytorchActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers '
|
|
65
71
|
f'were found for node {node}')
|
|
66
72
|
|
|
73
|
+
|
|
67
74
|
def get_exportable_pytorch_model(graph: Graph):
|
|
68
75
|
"""
|
|
69
76
|
Convert graph to fully quantized PyTorch model.
|
|
@@ -180,7 +180,9 @@ if FOUND_TORCH:
|
|
|
180
180
|
fw_impl=fw_impl,
|
|
181
181
|
tpc=target_platform_capabilities,
|
|
182
182
|
target_resource_utilization=target_resource_utilization,
|
|
183
|
-
tb_w=tb_w
|
|
183
|
+
tb_w=tb_w,
|
|
184
|
+
running_gptq=True)
|
|
185
|
+
|
|
184
186
|
float_graph = copy.deepcopy(graph)
|
|
185
187
|
|
|
186
188
|
# ---------------------- #
|
|
@@ -111,6 +111,7 @@ def gptq_runner(tg: Graph,
|
|
|
111
111
|
#############################################
|
|
112
112
|
# Gradient Based Post Training Quantization
|
|
113
113
|
#############################################
|
|
114
|
+
Logger.info("Running GPTQ optimization.")
|
|
114
115
|
tg_gptq = _apply_gptq(gptq_config,
|
|
115
116
|
gptq_representative_data_gen,
|
|
116
117
|
tb_w,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py
CHANGED
|
@@ -13,12 +13,12 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH
|
|
16
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.
|
|
16
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model, generate_tp_model, \
|
|
17
17
|
get_op_quantization_configs
|
|
18
18
|
if FOUND_TF:
|
|
19
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.
|
|
20
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.
|
|
19
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_latest
|
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import generate_keras_tpc
|
|
21
21
|
if FOUND_TORCH:
|
|
22
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.
|
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import get_pytorch_tpc as \
|
|
23
23
|
get_pytorch_tpc_latest
|
|
24
|
-
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.
|
|
24
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import generate_pytorch_tpc
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py
CHANGED
|
@@ -56,7 +56,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
|
56
56
|
|
|
57
57
|
# We define a default quantization config for all non-specified weights attributes.
|
|
58
58
|
default_weight_attr_config = AttributeQuantizationConfig(
|
|
59
|
-
weights_quantization_method=tp.QuantizationMethod.
|
|
59
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
60
60
|
weights_n_bits=8,
|
|
61
61
|
weights_per_channel_threshold=False,
|
|
62
62
|
enable_weights_quantization=False,
|
|
@@ -32,7 +32,7 @@ def get_tp_model() -> TargetPlatformModel:
|
|
|
32
32
|
NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
|
|
33
33
|
(for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
|
|
34
34
|
'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
|
|
35
|
-
This version enables metadata by default
|
|
35
|
+
This version enables metadata by default.
|
|
36
36
|
|
|
37
37
|
Returns: A TargetPlatformModel object.
|
|
38
38
|
|
|
@@ -44,7 +44,8 @@ def get_tp_model() -> TargetPlatformModel:
|
|
|
44
44
|
name='imx500_tp_model')
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def get_op_quantization_configs() ->
|
|
47
|
+
def get_op_quantization_configs() -> \
|
|
48
|
+
Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
|
48
49
|
"""
|
|
49
50
|
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
|
|
50
51
|
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
|
@@ -151,6 +152,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
|
151
152
|
# this configuration will be used for the operation quantization:
|
|
152
153
|
default_configuration_options = tp.QuantizationConfigOptions([default_config])
|
|
153
154
|
|
|
155
|
+
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
|
156
|
+
# Constant configuration is similar to the default eight bit configuration except for PoT
|
|
157
|
+
# quantization method for the constant.
|
|
158
|
+
# Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
|
|
159
|
+
# define the desired quantization properties for them.
|
|
160
|
+
const_config = default_config.clone_and_edit(
|
|
161
|
+
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
|
162
|
+
enable_weights_quantization=True))
|
|
163
|
+
if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
|
|
164
|
+
const_config.default_weight_attr_config.weights_per_channel_threshold is False):
|
|
165
|
+
mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
|
|
166
|
+
const_configuration_options = tp.QuantizationConfigOptions([const_config])
|
|
167
|
+
|
|
154
168
|
# Create a TargetPlatformModel and set its default quantization config.
|
|
155
169
|
# This default configuration will be used for all operations
|
|
156
170
|
# unless specified otherwise (see OperatorsSet, for example):
|
|
@@ -184,10 +198,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
|
184
198
|
# Define operations sets without quantization configuration
|
|
185
199
|
# options (useful for creating fusing patterns, for example):
|
|
186
200
|
any_relu = tp.OperatorsSet("AnyReLU")
|
|
187
|
-
add = tp.OperatorsSet("Add")
|
|
188
|
-
sub = tp.OperatorsSet("Sub")
|
|
189
|
-
mul = tp.OperatorsSet("Mul")
|
|
190
|
-
div = tp.OperatorsSet("Div")
|
|
201
|
+
add = tp.OperatorsSet("Add", const_configuration_options)
|
|
202
|
+
sub = tp.OperatorsSet("Sub", const_configuration_options)
|
|
203
|
+
mul = tp.OperatorsSet("Mul", const_configuration_options)
|
|
204
|
+
div = tp.OperatorsSet("Div", const_configuration_options)
|
|
191
205
|
prelu = tp.OperatorsSet("PReLU")
|
|
192
206
|
swish = tp.OperatorsSet("Swish")
|
|
193
207
|
sigmoid = tp.OperatorsSet("Sigmoid")
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py
CHANGED
|
@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
|
122
122
|
tp.OperationsSetToLayers("Add", [tf.add, Add])
|
|
123
123
|
tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
|
|
124
124
|
tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
|
|
125
|
-
tp.OperationsSetToLayers("Div", [tf.math.divide])
|
|
125
|
+
tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
|
|
126
126
|
tp.OperationsSetToLayers("PReLU", [PReLU])
|
|
127
127
|
tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
|
|
128
128
|
tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py
CHANGED
|
@@ -33,7 +33,7 @@ def get_tp_model() -> TargetPlatformModel:
|
|
|
33
33
|
NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
|
|
34
34
|
(for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
|
|
35
35
|
'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
|
|
36
|
-
This version enables metadata by default
|
|
36
|
+
This version enables metadata by default.
|
|
37
37
|
|
|
38
38
|
Returns: A TargetPlatformModel object.
|
|
39
39
|
|
|
@@ -45,7 +45,8 @@ def get_tp_model() -> TargetPlatformModel:
|
|
|
45
45
|
name='imx500_lut_tp_model')
|
|
46
46
|
|
|
47
47
|
|
|
48
|
-
def get_op_quantization_configs() ->
|
|
48
|
+
def get_op_quantization_configs() -> \
|
|
49
|
+
Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
|
49
50
|
"""
|
|
50
51
|
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
|
|
51
52
|
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
|
@@ -57,13 +58,13 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
|
57
58
|
|
|
58
59
|
# We define a default quantization config for all non-specified weights attributes.
|
|
59
60
|
default_weight_attr_config = AttributeQuantizationConfig(
|
|
60
|
-
weights_quantization_method=tp.QuantizationMethod.
|
|
61
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
|
61
62
|
weights_n_bits=8,
|
|
62
63
|
weights_per_channel_threshold=False,
|
|
63
64
|
enable_weights_quantization=False,
|
|
64
65
|
lut_values_bitwidth=None)
|
|
65
66
|
|
|
66
|
-
#
|
|
67
|
+
# define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
|
67
68
|
kernel_base_config = AttributeQuantizationConfig(
|
|
68
69
|
weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
|
|
69
70
|
weights_n_bits=8,
|
|
@@ -150,6 +151,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
|
150
151
|
# this configuration will be used for the operation quantization:
|
|
151
152
|
default_configuration_options = tp.QuantizationConfigOptions([default_config])
|
|
152
153
|
|
|
154
|
+
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
|
155
|
+
# Constant configuration is similar to the default eight bit configuration except for PoT
|
|
156
|
+
# quantization method for the constant.
|
|
157
|
+
# Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
|
|
158
|
+
# define the desired quantization properties for them.
|
|
159
|
+
const_config = default_config.clone_and_edit(
|
|
160
|
+
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
|
161
|
+
enable_weights_quantization=True))
|
|
162
|
+
if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
|
|
163
|
+
const_config.default_weight_attr_config.weights_per_channel_threshold is False):
|
|
164
|
+
mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
|
|
165
|
+
const_configuration_options = tp.QuantizationConfigOptions([const_config])
|
|
166
|
+
|
|
153
167
|
# Create a TargetPlatformModel and set its default quantization config.
|
|
154
168
|
# This default configuration will be used for all operations
|
|
155
169
|
# unless specified otherwise (see OperatorsSet, for example):
|
|
@@ -181,10 +195,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
|
|
|
181
195
|
# Define operations sets without quantization configuration
|
|
182
196
|
# options (useful for creating fusing patterns, for example):
|
|
183
197
|
any_relu = tp.OperatorsSet("AnyReLU")
|
|
184
|
-
add = tp.OperatorsSet("Add")
|
|
185
|
-
sub = tp.OperatorsSet("Sub")
|
|
186
|
-
mul = tp.OperatorsSet("Mul")
|
|
187
|
-
div = tp.OperatorsSet("Div")
|
|
198
|
+
add = tp.OperatorsSet("Add", const_configuration_options)
|
|
199
|
+
sub = tp.OperatorsSet("Sub", const_configuration_options)
|
|
200
|
+
mul = tp.OperatorsSet("Mul", const_configuration_options)
|
|
201
|
+
div = tp.OperatorsSet("Div", const_configuration_options)
|
|
188
202
|
prelu = tp.OperatorsSet("PReLU")
|
|
189
203
|
swish = tp.OperatorsSet("Swish")
|
|
190
204
|
sigmoid = tp.OperatorsSet("Sigmoid")
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py
CHANGED
|
@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
|
|
|
122
122
|
tp.OperationsSetToLayers("Add", [tf.add, Add])
|
|
123
123
|
tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
|
|
124
124
|
tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
|
|
125
|
-
tp.OperationsSetToLayers("Div", [tf.math.divide])
|
|
125
|
+
tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
|
|
126
126
|
tp.OperationsSetToLayers("PReLU", [PReLU])
|
|
127
127
|
tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
|
|
128
128
|
tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])
|
{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/LICENSE.md
RENAMED
|
File without changes
|
|
File without changes
|
{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/top_level.txt
RENAMED
|
File without changes
|