mct-nightly 2.1.0.20240801.515__py3-none-any.whl → 2.1.0.20240802.429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240801.515.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/METADATA +1 -1
- {mct_nightly-2.1.0.20240801.515.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/RECORD +33 -29
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +2 -1
- model_compression_toolkit/core/common/graph/base_node.py +50 -5
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +6 -6
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +1 -0
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +6 -4
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +19 -12
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +14 -14
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py +14 -9
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +25 -17
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +10 -6
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +12 -5
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +7 -5
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +25 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +5 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +235 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +132 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +112 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +2 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +2 -0
- {mct_nightly-2.1.0.20240801.515.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240801.515.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240801.515.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@ import numpy as np
|
|
16
16
|
from typing import Union, Tuple, Dict
|
17
17
|
|
18
18
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
19
|
-
from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
|
19
|
+
from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
|
20
20
|
from model_compression_toolkit.core.common.hessian import HessianInfoService
|
21
21
|
from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
|
22
22
|
get_threshold_selection_tensor_error_function, get_threshold_selection_histogram_error_function, _kl_error_histogram
|
@@ -106,7 +106,8 @@ def symmetric_selection_histogram(bins: np.ndarray,
|
|
106
106
|
constrained: bool = True,
|
107
107
|
n_iter: int = 20,
|
108
108
|
min_threshold: float = MIN_THRESHOLD,
|
109
|
-
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE
|
109
|
+
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
|
110
|
+
is_signed: bool = None) -> Dict:
|
110
111
|
"""
|
111
112
|
Compute the optimal threshold based on the provided QuantizationErrorMethod to quantize a histogram.
|
112
113
|
Different search is applied, depends on the value of the selected QuantizationErrorMethod.
|
@@ -122,6 +123,7 @@ def symmetric_selection_histogram(bins: np.ndarray,
|
|
122
123
|
n_iter: Number of iteration ot search for the threshold (not used for this method).
|
123
124
|
min_threshold: Minimal threshold to use if threshold is too small (used only for kl threshold selection).
|
124
125
|
quant_error_method: an error function to optimize the parameters' selection accordingly.
|
126
|
+
is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
|
125
127
|
|
126
128
|
Returns:
|
127
129
|
Optimal threshold to quantize the histogram a symmetric manner.
|
@@ -129,23 +131,27 @@ def symmetric_selection_histogram(bins: np.ndarray,
|
|
129
131
|
tensor_max = np.max(np.abs(bins)[1:][counts > 0])
|
130
132
|
if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
|
131
133
|
threshold = get_init_threshold(min_threshold, tensor_max)
|
134
|
+
# Resolve is_signed in case it is None.
|
135
|
+
signed = (bins<0).any() if is_signed is None else is_signed
|
132
136
|
elif quant_error_method == qc.QuantizationErrorMethod.KL:
|
133
137
|
# search for KL error is separated because the error method signature is different from the other error methods.
|
134
|
-
threshold = kl_qparams_symmetric_selection_histogram_search(_kl_error_histogram,
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
138
|
+
threshold, signed = kl_qparams_symmetric_selection_histogram_search(_kl_error_histogram,
|
139
|
+
tensor_max,
|
140
|
+
bins,
|
141
|
+
counts,
|
142
|
+
n_bits,
|
143
|
+
min_threshold=min_threshold,
|
144
|
+
is_signed=is_signed)
|
140
145
|
else:
|
141
146
|
error_function = get_threshold_selection_histogram_error_function(QuantizationMethod.SYMMETRIC, quant_error_method, p)
|
142
|
-
threshold = qparams_symmetric_selection_histogram_search(error_function,
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
147
|
+
threshold, signed = qparams_symmetric_selection_histogram_search(error_function,
|
148
|
+
tensor_max,
|
149
|
+
bins,
|
150
|
+
counts,
|
151
|
+
n_bits,
|
152
|
+
min_threshold=min_threshold,
|
153
|
+
is_signed=is_signed)
|
154
|
+
return {THRESHOLD: threshold, SIGNED: signed}
|
149
155
|
|
150
156
|
|
151
157
|
def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
|
@@ -158,7 +164,8 @@ def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
|
|
158
164
|
n_iter: int = 20,
|
159
165
|
min_threshold: float = MIN_THRESHOLD,
|
160
166
|
quant_error_method: qc.QuantizationErrorMethod =
|
161
|
-
qc.QuantizationErrorMethod.NOCLIPPING
|
167
|
+
qc.QuantizationErrorMethod.NOCLIPPING,
|
168
|
+
is_signed: bool = None) -> Dict:
|
162
169
|
"""
|
163
170
|
Gets a threshold between min and max numbers.
|
164
171
|
If computed threshold is less than min_threshold, min_threshold is returned.
|
@@ -175,7 +182,8 @@ def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
|
|
175
182
|
constrained,
|
176
183
|
n_iter,
|
177
184
|
min_threshold=min_threshold,
|
178
|
-
quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING
|
185
|
+
quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING,
|
186
|
+
is_signed=is_signed)
|
179
187
|
|
180
188
|
|
181
189
|
def get_init_threshold(min_threshold: float, tensor_max: np.ndarray, per_channel: bool = False) -> np.ndarray:
|
@@ -16,7 +16,7 @@ import numpy as np
|
|
16
16
|
from typing import Union, Tuple, Dict
|
17
17
|
|
18
18
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
19
|
-
from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES
|
19
|
+
from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
|
20
20
|
from model_compression_toolkit.core.common.hessian import HessianInfoService
|
21
21
|
from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
|
22
22
|
qparams_uniform_selection_tensor_search, qparams_uniform_selection_histogram_search
|
@@ -114,7 +114,8 @@ def uniform_selection_histogram(bins: np.ndarray,
|
|
114
114
|
constrained: bool = True,
|
115
115
|
n_iter: int = 20,
|
116
116
|
min_threshold: float = MIN_THRESHOLD,
|
117
|
-
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE
|
117
|
+
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
|
118
|
+
is_signed: bool = None) -> Dict:
|
118
119
|
"""
|
119
120
|
Compute the optimal quantization range based on the provided QuantizationErrorMethod
|
120
121
|
to uniformly quantize the histogram.
|
@@ -131,6 +132,7 @@ def uniform_selection_histogram(bins: np.ndarray,
|
|
131
132
|
n_iter: Number of iteration ot search for the threshold (not used for this method).
|
132
133
|
min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
|
133
134
|
quant_error_method: an error function to optimize the range parameters selection accordingly.
|
135
|
+
is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
|
134
136
|
|
135
137
|
Returns:
|
136
138
|
Optimal quantization range to quantize the histogram uniformly.
|
@@ -139,6 +141,7 @@ def uniform_selection_histogram(bins: np.ndarray,
|
|
139
141
|
tensor_max = np.max(bins[1:][counts > 0])
|
140
142
|
tensor_min_max = np.array([tensor_min, tensor_max])
|
141
143
|
|
144
|
+
signed = tensor_min < 0 if is_signed is None else is_signed
|
142
145
|
if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
|
143
146
|
mm = tensor_min_max
|
144
147
|
else:
|
@@ -150,7 +153,7 @@ def uniform_selection_histogram(bins: np.ndarray,
|
|
150
153
|
n_bits)
|
151
154
|
|
152
155
|
return {RANGE_MIN: mm[0],
|
153
|
-
RANGE_MAX: mm[1]}
|
156
|
+
RANGE_MAX: mm[1], SIGNED: signed}
|
154
157
|
|
155
158
|
|
156
159
|
def uniform_no_clipping_selection_min_max(bins: np.ndarray,
|
@@ -163,7 +166,8 @@ def uniform_no_clipping_selection_min_max(bins: np.ndarray,
|
|
163
166
|
n_iter: int = 20,
|
164
167
|
min_threshold: float = MIN_THRESHOLD,
|
165
168
|
quant_error_method: qc.QuantizationErrorMethod =
|
166
|
-
qc.QuantizationErrorMethod.NOCLIPPING
|
169
|
+
qc.QuantizationErrorMethod.NOCLIPPING,
|
170
|
+
is_signed: bool = None) -> Dict:
|
167
171
|
"""
|
168
172
|
Gets a quantization rage between min and max numbers.
|
169
173
|
|
@@ -179,5 +183,5 @@ def uniform_no_clipping_selection_min_max(bins: np.ndarray,
|
|
179
183
|
constrained,
|
180
184
|
n_iter,
|
181
185
|
min_threshold=min_threshold,
|
182
|
-
quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING
|
183
|
-
|
186
|
+
quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING,
|
187
|
+
is_signed=is_signed)
|
@@ -64,6 +64,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
|
|
64
64
|
|
65
65
|
for n in graph.nodes:
|
66
66
|
set_quantization_configs_to_node(node=n,
|
67
|
+
graph=graph,
|
67
68
|
quant_config=quant_config,
|
68
69
|
fw_info=graph.fw_info,
|
69
70
|
tpc=graph.tpc,
|
@@ -72,6 +73,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
|
|
72
73
|
|
73
74
|
|
74
75
|
def set_quantization_configs_to_node(node: BaseNode,
|
76
|
+
graph: Graph,
|
75
77
|
quant_config: QuantizationConfig,
|
76
78
|
fw_info: FrameworkInfo,
|
77
79
|
tpc: TargetPlatformCapabilities,
|
@@ -81,19 +83,22 @@ def set_quantization_configs_to_node(node: BaseNode,
|
|
81
83
|
|
82
84
|
Args:
|
83
85
|
node: Node to set its quantization configurations.
|
86
|
+
graph: Model's internal representation graph.
|
84
87
|
quant_config: Quantization configuration to generate the node's configurations from.
|
85
88
|
fw_info: Information needed for quantization about the specific framework.
|
86
89
|
tpc: TargetPlatformCapabilities to get default OpQuantizationConfig.
|
87
90
|
mixed_precision_enable: is mixed precision enabled.
|
88
91
|
"""
|
89
92
|
node_qc_options = node.get_qco(tpc)
|
93
|
+
base_config, node_qc_options_list = node.filter_node_qco_by_graph(tpc, graph.get_next_nodes(node), node_qc_options)
|
90
94
|
|
91
95
|
# Create QC candidates for weights and activation combined
|
92
96
|
weight_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
|
93
97
|
node.candidates_quantization_cfg = _create_node_candidates_qc(quant_config,
|
94
98
|
fw_info,
|
95
99
|
weight_channel_axis,
|
96
|
-
|
100
|
+
node_qc_options_list,
|
101
|
+
base_config,
|
97
102
|
node,
|
98
103
|
mixed_precision_enable=mixed_precision_enable)
|
99
104
|
|
@@ -186,7 +191,8 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
|
|
186
191
|
def _create_node_candidates_qc(qc: QuantizationConfig,
|
187
192
|
fw_info: FrameworkInfo,
|
188
193
|
weight_channel_axis: Tuple[int, int],
|
189
|
-
|
194
|
+
node_qc_options_list: List[OpQuantizationConfig],
|
195
|
+
base_config: OpQuantizationConfig,
|
190
196
|
node: BaseNode,
|
191
197
|
mixed_precision_enable: bool = False) -> List[CandidateNodeQuantizationConfig]:
|
192
198
|
"""
|
@@ -196,7 +202,8 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
196
202
|
qc: Quantization configuration the quantization process should follow.
|
197
203
|
fw_info: Framework information (e.g., which layers should have their kernels' quantized).
|
198
204
|
weight_channel_axis: (Output, Input) channel index of the node's kernel.
|
199
|
-
|
205
|
+
node_qc_options_list: List of quantization configs of node.
|
206
|
+
base_config: Base quantization config for node.
|
200
207
|
node: A node to set quantization configuration candidates to.
|
201
208
|
mixed_precision_enable: is mixed precision enabled
|
202
209
|
|
@@ -208,7 +215,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
208
215
|
node_attrs_list = node.get_node_weights_attributes()
|
209
216
|
|
210
217
|
if mixed_precision_enable:
|
211
|
-
for op_cfg in
|
218
|
+
for op_cfg in node_qc_options_list:
|
212
219
|
candidate_qc = copy.deepcopy(qc)
|
213
220
|
candidates.append(_create_node_single_candidate_qc(candidate_qc,
|
214
221
|
fw_info,
|
@@ -220,7 +227,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
|
|
220
227
|
candidates.append(_create_node_single_candidate_qc(qc,
|
221
228
|
fw_info,
|
222
229
|
weight_channel_axis,
|
223
|
-
|
230
|
+
base_config,
|
224
231
|
node_attrs_list))
|
225
232
|
|
226
233
|
return candidates
|
@@ -349,9 +349,15 @@ def shift_negative_function(graph: Graph,
|
|
349
349
|
add_node.output_shape,
|
350
350
|
pad_top, pad_btm, pad_left, pad_right)
|
351
351
|
|
352
|
+
# Insert a pad node between the add node to the op2d, and create statistics for the pad node
|
353
|
+
insert_node_before_node(graph,
|
354
|
+
node_to_insert=pad_node,
|
355
|
+
last_node=op2d_node)
|
356
|
+
|
352
357
|
# Set quantization configuration to node, even though we do not quantize it:
|
353
358
|
set_quantization_configs_to_node(fw_info=fw_info,
|
354
359
|
node=pad_node,
|
360
|
+
graph=graph,
|
355
361
|
quant_config=core_config.quantization_config,
|
356
362
|
tpc=graph.tpc,
|
357
363
|
mixed_precision_enable=core_config.mixed_precision_enable)
|
@@ -361,11 +367,6 @@ def shift_negative_function(graph: Graph,
|
|
361
367
|
for attr in pad_node.get_node_weights_attributes():
|
362
368
|
candidate_qc.weights_quantization_cfg.get_attr_config(attr).enable_weights_quantization = False
|
363
369
|
|
364
|
-
# Insert a pad node between the add node to the op2d, and create statistics for the pad node
|
365
|
-
insert_node_before_node(graph,
|
366
|
-
node_to_insert=pad_node,
|
367
|
-
last_node=op2d_node)
|
368
|
-
|
369
370
|
graph.set_out_stats_collector_to_node(pad_node,
|
370
371
|
add_node_stats_collector) # We ignore the padding effect on statistics
|
371
372
|
|
@@ -373,6 +374,7 @@ def shift_negative_function(graph: Graph,
|
|
373
374
|
|
374
375
|
set_quantization_configs_to_node(fw_info=fw_info,
|
375
376
|
node=add_node,
|
377
|
+
graph=graph,
|
376
378
|
quant_config=core_config.quantization_config,
|
377
379
|
tpc=graph.tpc,
|
378
380
|
mixed_precision_enable=core_config.mixed_precision_enable)
|
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
import copy
|
17
|
-
from typing import List, Dict, Union, Any
|
17
|
+
from typing import List, Dict, Union, Any, Tuple
|
18
18
|
|
19
19
|
from mct_quantizers import QuantizationMethod
|
20
20
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
@@ -114,11 +114,13 @@ class OpQuantizationConfig:
|
|
114
114
|
attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
|
115
115
|
activation_quantization_method: QuantizationMethod,
|
116
116
|
activation_n_bits: int,
|
117
|
+
supported_input_activation_n_bits: Union[int, Tuple[int]],
|
117
118
|
enable_activation_quantization: bool,
|
118
119
|
quantization_preserving: bool,
|
119
120
|
fixed_scale: float,
|
120
121
|
fixed_zero_point: int,
|
121
|
-
simd_size: int
|
122
|
+
simd_size: int,
|
123
|
+
is_signed: bool = None
|
122
124
|
):
|
123
125
|
"""
|
124
126
|
|
@@ -127,10 +129,12 @@ class OpQuantizationConfig:
|
|
127
129
|
attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]): A mapping between an op attribute name and its quantization configuration.
|
128
130
|
activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
|
129
131
|
activation_n_bits (int): Number of bits to quantize the activations.
|
132
|
+
supported_input_activation_n_bits (int or Tuple[int]): Number of bits that operator accepts as input.
|
130
133
|
enable_activation_quantization (bool): Whether to quantize the model activations or not.
|
131
134
|
quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
|
132
135
|
fixed_scale (float): Scale to use for an operator quantization parameters.
|
133
136
|
fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
|
137
|
+
is_signed (bool): Force activation quantization signedness (None means don't force).
|
134
138
|
simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
|
135
139
|
|
136
140
|
"""
|
@@ -140,10 +144,17 @@ class OpQuantizationConfig:
|
|
140
144
|
|
141
145
|
self.activation_quantization_method = activation_quantization_method
|
142
146
|
self.activation_n_bits = activation_n_bits
|
147
|
+
if isinstance(supported_input_activation_n_bits, tuple):
|
148
|
+
self.supported_input_activation_n_bits = supported_input_activation_n_bits
|
149
|
+
elif isinstance(supported_input_activation_n_bits, int):
|
150
|
+
self.supported_input_activation_n_bits = (supported_input_activation_n_bits,)
|
151
|
+
else:
|
152
|
+
Logger.critical(f"Supported_input_activation_n_bits only accepts int or tuple of ints, but got {type(supported_input_activation_n_bits)}") # pragma: no cover
|
143
153
|
self.enable_activation_quantization = enable_activation_quantization
|
144
154
|
self.quantization_preserving = quantization_preserving
|
145
155
|
self.fixed_scale = fixed_scale
|
146
156
|
self.fixed_zero_point = fixed_zero_point
|
157
|
+
self.is_signed = is_signed
|
147
158
|
self.simd_size = simd_size
|
148
159
|
|
149
160
|
def get_info(self):
|
@@ -193,9 +204,21 @@ class OpQuantizationConfig:
|
|
193
204
|
self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
|
194
205
|
self.activation_quantization_method == other.activation_quantization_method and \
|
195
206
|
self.activation_n_bits == other.activation_n_bits and \
|
207
|
+
self.supported_input_activation_n_bits == other.supported_input_activation_n_bits and \
|
196
208
|
self.enable_activation_quantization == other.enable_activation_quantization and \
|
209
|
+
self.is_signed == other.is_signed and \
|
197
210
|
self.simd_size == other.simd_size
|
198
211
|
|
212
|
+
@property
|
213
|
+
def max_input_activation_n_bits(self) -> int:
|
214
|
+
"""
|
215
|
+
Get maximum supported input bit-width.
|
216
|
+
|
217
|
+
Returns: Maximum supported input bit-width.
|
218
|
+
|
219
|
+
"""
|
220
|
+
return max(self.supported_input_activation_n_bits)
|
221
|
+
|
199
222
|
|
200
223
|
class QuantizationConfigOptions:
|
201
224
|
"""
|
@@ -29,6 +29,7 @@ if FOUND_TF:
|
|
29
29
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2_lut.tpc_keras import get_keras_tpc as get_keras_tpc_v2_lut
|
30
30
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3.tpc_keras import get_keras_tpc as get_keras_tpc_v3
|
31
31
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3_lut.tpc_keras import get_keras_tpc as get_keras_tpc_v3_lut
|
32
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tpc_keras import get_keras_tpc as get_keras_tpc_v4
|
32
33
|
|
33
34
|
# Keras: TPC versioning
|
34
35
|
keras_tpc_models_dict = {'v1': get_keras_tpc_v1,
|
@@ -38,6 +39,7 @@ if FOUND_TF:
|
|
38
39
|
'v2_lut': get_keras_tpc_v2_lut,
|
39
40
|
'v3': get_keras_tpc_v3,
|
40
41
|
'v3_lut': get_keras_tpc_v3_lut,
|
42
|
+
'v4': get_keras_tpc_v4,
|
41
43
|
LATEST: get_keras_tpc_latest}
|
42
44
|
|
43
45
|
###############################
|
@@ -60,6 +62,8 @@ if FOUND_TORCH:
|
|
60
62
|
get_pytorch_tpc as get_pytorch_tpc_v3
|
61
63
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3_lut.tpc_pytorch import \
|
62
64
|
get_pytorch_tpc as get_pytorch_tpc_v3_lut
|
65
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v4.tpc_pytorch import \
|
66
|
+
get_pytorch_tpc as get_pytorch_tpc_v4
|
63
67
|
|
64
68
|
# Pytorch: TPC versioning
|
65
69
|
pytorch_tpc_models_dict = {'v1': get_pytorch_tpc_v1,
|
@@ -69,6 +73,7 @@ if FOUND_TORCH:
|
|
69
73
|
'v2_lut': get_pytorch_tpc_v2_lut,
|
70
74
|
'v3': get_pytorch_tpc_v3,
|
71
75
|
'v3_lut': get_pytorch_tpc_v3_lut,
|
76
|
+
'v4': get_pytorch_tpc_v4,
|
72
77
|
LATEST: get_pytorch_tpc_latest}
|
73
78
|
|
74
79
|
tpc_dict = {TENSORFLOW: keras_tpc_models_dict,
|
@@ -93,6 +93,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
93
93
|
attr_weights_configs_mapping={},
|
94
94
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
95
95
|
activation_n_bits=8,
|
96
|
+
supported_input_activation_n_bits=8,
|
96
97
|
enable_activation_quantization=True,
|
97
98
|
quantization_preserving=False,
|
98
99
|
fixed_scale=None,
|
@@ -105,6 +106,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
105
106
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
106
107
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
107
108
|
activation_n_bits=8,
|
109
|
+
supported_input_activation_n_bits=8,
|
108
110
|
enable_activation_quantization=True,
|
109
111
|
quantization_preserving=False,
|
110
112
|
fixed_scale=None,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py
CHANGED
@@ -89,6 +89,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
89
89
|
attr_weights_configs_mapping={},
|
90
90
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
91
91
|
activation_n_bits=8,
|
92
|
+
supported_input_activation_n_bits=8,
|
92
93
|
enable_activation_quantization=True,
|
93
94
|
quantization_preserving=False,
|
94
95
|
fixed_scale=None,
|
@@ -101,6 +102,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
101
102
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
102
103
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
103
104
|
activation_n_bits=8,
|
105
|
+
supported_input_activation_n_bits=8,
|
104
106
|
enable_activation_quantization=True,
|
105
107
|
quantization_preserving=False,
|
106
108
|
fixed_scale=None,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py
CHANGED
@@ -89,6 +89,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
89
89
|
attr_weights_configs_mapping={},
|
90
90
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
91
91
|
activation_n_bits=8,
|
92
|
+
supported_input_activation_n_bits=8,
|
92
93
|
enable_activation_quantization=True,
|
93
94
|
quantization_preserving=False,
|
94
95
|
fixed_scale=None,
|
@@ -101,6 +102,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
|
|
101
102
|
default_weight_attr_config=default_weight_attr_config,
|
102
103
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
103
104
|
activation_n_bits=8,
|
105
|
+
supported_input_activation_n_bits=8,
|
104
106
|
enable_activation_quantization=True,
|
105
107
|
quantization_preserving=False,
|
106
108
|
fixed_scale=None,
|
@@ -95,6 +95,7 @@ def get_op_quantization_configs() -> \
|
|
95
95
|
attr_weights_configs_mapping={},
|
96
96
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
97
97
|
activation_n_bits=8,
|
98
|
+
supported_input_activation_n_bits=8,
|
98
99
|
enable_activation_quantization=True,
|
99
100
|
quantization_preserving=False,
|
100
101
|
fixed_scale=None,
|
@@ -107,6 +108,7 @@ def get_op_quantization_configs() -> \
|
|
107
108
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
108
109
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
109
110
|
activation_n_bits=8,
|
111
|
+
supported_input_activation_n_bits=8,
|
110
112
|
enable_activation_quantization=True,
|
111
113
|
quantization_preserving=False,
|
112
114
|
fixed_scale=None,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py
CHANGED
@@ -91,6 +91,7 @@ def get_op_quantization_configs() -> \
|
|
91
91
|
attr_weights_configs_mapping={},
|
92
92
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
93
93
|
activation_n_bits=8,
|
94
|
+
supported_input_activation_n_bits=8,
|
94
95
|
enable_activation_quantization=True,
|
95
96
|
quantization_preserving=False,
|
96
97
|
fixed_scale=None,
|
@@ -103,6 +104,7 @@ def get_op_quantization_configs() -> \
|
|
103
104
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
104
105
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
105
106
|
activation_n_bits=8,
|
107
|
+
supported_input_activation_n_bits=8,
|
106
108
|
enable_activation_quantization=True,
|
107
109
|
quantization_preserving=False,
|
108
110
|
fixed_scale=None,
|
@@ -95,6 +95,7 @@ def get_op_quantization_configs() -> \
|
|
95
95
|
attr_weights_configs_mapping={},
|
96
96
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
97
97
|
activation_n_bits=8,
|
98
|
+
supported_input_activation_n_bits=8,
|
98
99
|
enable_activation_quantization=True,
|
99
100
|
quantization_preserving=False,
|
100
101
|
fixed_scale=None,
|
@@ -107,6 +108,7 @@ def get_op_quantization_configs() -> \
|
|
107
108
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
108
109
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
109
110
|
activation_n_bits=8,
|
111
|
+
supported_input_activation_n_bits=8,
|
110
112
|
enable_activation_quantization=True,
|
111
113
|
quantization_preserving=False,
|
112
114
|
fixed_scale=None,
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py
CHANGED
@@ -91,6 +91,7 @@ def get_op_quantization_configs() -> \
|
|
91
91
|
attr_weights_configs_mapping={},
|
92
92
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
93
93
|
activation_n_bits=8,
|
94
|
+
supported_input_activation_n_bits=8,
|
94
95
|
enable_activation_quantization=True,
|
95
96
|
quantization_preserving=False,
|
96
97
|
fixed_scale=None,
|
@@ -103,6 +104,7 @@ def get_op_quantization_configs() -> \
|
|
103
104
|
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
104
105
|
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
105
106
|
activation_n_bits=8,
|
107
|
+
supported_input_activation_n_bits=8,
|
106
108
|
enable_activation_quantization=True,
|
107
109
|
quantization_preserving=False,
|
108
110
|
fixed_scale=None,
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
__version__ = 'v4'
|