mct-nightly 2.1.0.20240608.434__py3-none-any.whl → 2.1.0.20240610.442__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/METADATA +1 -1
- {mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/RECORD +26 -18
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/base_node.py +1 -4
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +10 -6
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +15 -7
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +30 -14
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +8 -7
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py +108 -87
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +15 -13
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +29 -14
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +40 -14
- model_compression_toolkit/core/keras/reader/node_builder.py +3 -3
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +25 -23
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +10 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +222 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py +131 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py +111 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +219 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py +131 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py +110 -0
- {mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/WHEEL +0 -0
- {mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import Dict, Any
|
15
|
+
from typing import Dict, Any, Tuple
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
|
@@ -34,7 +34,7 @@ def get_weights_qparams(weights_attr_values: np.ndarray,
|
|
34
34
|
output_channels_axis: int,
|
35
35
|
node=None,
|
36
36
|
hessian_info_service: HessianInfoService = None,
|
37
|
-
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict[Any, Any]:
|
37
|
+
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Tuple[Dict[Any, Any], int]:
|
38
38
|
"""
|
39
39
|
Compute thresholds to quantize a kernel according to a NodeWeightsQuantizationConfig
|
40
40
|
instance.
|
@@ -50,22 +50,24 @@ def get_weights_qparams(weights_attr_values: np.ndarray,
|
|
50
50
|
|
51
51
|
Returns:
|
52
52
|
A dictionary with the quantization threshold of the kernel.
|
53
|
+
Selected quantization channel axis.
|
53
54
|
"""
|
54
55
|
if attr_quant_config.weights_quantization_params_fn is not None:
|
55
|
-
weights_params = attr_quant_config.weights_quantization_params_fn(
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
56
|
+
weights_params, output_channels_axis = attr_quant_config.weights_quantization_params_fn(
|
57
|
+
weights_attr_values,
|
58
|
+
p=attr_quant_config.l_p_value,
|
59
|
+
n_bits=attr_quant_config.weights_n_bits,
|
60
|
+
per_channel=attr_quant_config.weights_per_channel_threshold,
|
61
|
+
channel_axis=output_channels_axis,
|
62
|
+
min_threshold=weights_quant_config.min_threshold,
|
63
|
+
quant_error_method=attr_quant_config.weights_error_method,
|
64
|
+
node=node,
|
65
|
+
hessian_info_service=hessian_info_service,
|
66
|
+
num_hessian_samples=num_hessian_samples)
|
65
67
|
else:
|
66
68
|
weights_params = {}
|
67
69
|
|
68
|
-
return weights_params
|
70
|
+
return weights_params, output_channels_axis
|
69
71
|
|
70
72
|
|
71
73
|
def _get_kernel_channels_mapping(fw_info:FrameworkInfo,
|
@@ -13,6 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
import numpy as np
|
16
|
+
from typing import Union, Tuple, Dict
|
16
17
|
|
17
18
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
18
19
|
from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
|
@@ -25,6 +26,8 @@ from model_compression_toolkit.core.common.quantization.quantization_params_gene
|
|
25
26
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import \
|
26
27
|
get_tensor_max
|
27
28
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
29
|
+
from model_compression_toolkit.core.common.similarity_analyzer import compute_mse
|
30
|
+
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import quantize_tensor
|
28
31
|
|
29
32
|
|
30
33
|
def symmetric_selection_tensor(tensor_data: np.ndarray,
|
@@ -37,7 +40,8 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
|
|
37
40
|
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
|
38
41
|
node=None,
|
39
42
|
hessian_info_service: HessianInfoService = None,
|
40
|
-
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES
|
43
|
+
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
|
44
|
+
) -> Tuple[Dict[str, np.ndarray], int]:
|
41
45
|
"""
|
42
46
|
Compute the optimal threshold based on the provided QuantizationErrorMethod to quantize the tensor.
|
43
47
|
Different search is applied, depends on the value of the selected QuantizationErrorMethod.
|
@@ -47,7 +51,7 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
|
|
47
51
|
p: p-norm to use for the Lp-norm distance.
|
48
52
|
n_bits: Number of bits to quantize the tensor.
|
49
53
|
per_channel: Whether the quantization should be per-channel or not.
|
50
|
-
channel_axis: Output channel index.
|
54
|
+
channel_axis: Output channel index. if None, search for best axis.
|
51
55
|
n_iter: Number of iterations to search for the optimal threshold (not used for this method).
|
52
56
|
min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
|
53
57
|
quant_error_method: an error function to optimize the parameters' selection accordingly.
|
@@ -57,12 +61,24 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
|
|
57
61
|
|
58
62
|
Returns:
|
59
63
|
Optimal threshold to quantize the tensor in a symmetric manner.
|
64
|
+
Selected quantization channel axis.
|
60
65
|
"""
|
61
66
|
|
62
|
-
tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits)
|
63
|
-
|
64
67
|
if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
|
65
|
-
|
68
|
+
if channel_axis is None and per_channel:
|
69
|
+
total_error_list = []
|
70
|
+
th_list = []
|
71
|
+
for _axis in range(len(tensor_data.shape)):
|
72
|
+
tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits)
|
73
|
+
threshold = get_init_threshold(min_threshold, tensor_max, per_channel)
|
74
|
+
q_tensor_data = quantize_tensor(tensor_data, threshold, n_bits, True)
|
75
|
+
total_error_list.append(compute_mse(tensor_data, q_tensor_data, norm=True))
|
76
|
+
th_list.append(threshold)
|
77
|
+
channel_axis = np.argmin(total_error_list)
|
78
|
+
threshold = th_list[channel_axis]
|
79
|
+
else:
|
80
|
+
tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits)
|
81
|
+
threshold = get_init_threshold(min_threshold, tensor_max, per_channel)
|
66
82
|
else:
|
67
83
|
signed = True # weights are always signed
|
68
84
|
axis = -1 if per_channel else None
|
@@ -71,15 +87,14 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
|
|
71
87
|
signed=signed, node=node,
|
72
88
|
hessian_info_service=hessian_info_service,
|
73
89
|
num_hessian_samples=num_hessian_samples)
|
74
|
-
threshold = qparams_symmetric_selection_tensor_search(error_function,
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return {THRESHOLD: threshold}
|
90
|
+
threshold, channel_axis = qparams_symmetric_selection_tensor_search(error_function,
|
91
|
+
tensor_data,
|
92
|
+
n_bits,
|
93
|
+
per_channel,
|
94
|
+
channel_axis,
|
95
|
+
min_threshold=min_threshold,
|
96
|
+
signed=signed)
|
97
|
+
return {THRESHOLD: threshold}, channel_axis
|
83
98
|
|
84
99
|
|
85
100
|
def symmetric_selection_histogram(bins: np.ndarray,
|
@@ -13,6 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
import numpy as np
|
16
|
+
from typing import Union, Tuple, Dict
|
16
17
|
|
17
18
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
18
19
|
from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES
|
@@ -24,6 +25,9 @@ from model_compression_toolkit.core.common.quantization.quantization_params_gene
|
|
24
25
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import get_tensor_max, \
|
25
26
|
get_tensor_min
|
26
27
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
28
|
+
from model_compression_toolkit.core.common.similarity_analyzer import compute_mse
|
29
|
+
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import uniform_quantize_tensor
|
30
|
+
|
27
31
|
|
28
32
|
def uniform_selection_tensor(tensor_data: np.ndarray,
|
29
33
|
p: int,
|
@@ -35,7 +39,8 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
|
|
35
39
|
quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
|
36
40
|
node=None,
|
37
41
|
hessian_info_service: HessianInfoService = None,
|
38
|
-
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES
|
42
|
+
num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
|
43
|
+
) -> Tuple[Dict[str, np.ndarray], int]:
|
39
44
|
"""
|
40
45
|
Compute the optimal quantization range based on the provided QuantizationErrorMethod
|
41
46
|
to uniformly quantize the tensor.
|
@@ -46,7 +51,7 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
|
|
46
51
|
p: p-norm to use for the Lp-norm distance.
|
47
52
|
n_bits: Number of bits to quantize the tensor.
|
48
53
|
per_channel: Whether the quantization should be per-channel or not.
|
49
|
-
channel_axis: Output channel index.
|
54
|
+
channel_axis: Output channel index. if None, search for best axis.
|
50
55
|
n_iter: Number of iterations to search for the optimal threshold (not used for this method).
|
51
56
|
min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
|
52
57
|
quant_error_method: an error function to optimize the range parameters' selection accordingly.
|
@@ -56,27 +61,48 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
|
|
56
61
|
|
57
62
|
Returns:
|
58
63
|
Optimal quantization range to quantize the tensor uniformly.
|
64
|
+
Selected quantization channel axis.
|
59
65
|
"""
|
60
|
-
tensor_min = get_tensor_min(tensor_data, per_channel, channel_axis)
|
61
|
-
tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits, is_uniform_quantization=True)
|
62
|
-
|
63
66
|
if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
|
64
|
-
|
67
|
+
if channel_axis is None and per_channel:
|
68
|
+
total_error_list = []
|
69
|
+
th_list = []
|
70
|
+
for _axis in range(len(tensor_data.shape)):
|
71
|
+
tensor_min = get_tensor_min(tensor_data, per_channel, _axis)
|
72
|
+
tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits, is_uniform_quantization=True)
|
73
|
+
q_tensor_data = uniform_quantize_tensor(tensor_data, tensor_min, tensor_max, n_bits)
|
74
|
+
total_error_list.append(compute_mse(tensor_data, q_tensor_data, norm=True))
|
75
|
+
th_list.append((tensor_min, tensor_max))
|
76
|
+
channel_axis = np.argmin(total_error_list)
|
77
|
+
mm = th_list[channel_axis]
|
78
|
+
else:
|
79
|
+
tensor_min = get_tensor_min(tensor_data, per_channel, channel_axis)
|
80
|
+
tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits, is_uniform_quantization=True)
|
81
|
+
mm = tensor_min, tensor_max
|
65
82
|
else:
|
66
83
|
axis = -1 if per_channel else None
|
67
84
|
error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.UNIFORM, quant_error_method,
|
68
85
|
p, axis=axis, norm=False, node=node,
|
69
86
|
hessian_info_service=hessian_info_service,
|
70
87
|
num_hessian_samples=num_hessian_samples)
|
71
|
-
mm = qparams_uniform_selection_tensor_search(error_function,
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
88
|
+
mm, channel_axis = qparams_uniform_selection_tensor_search(error_function,
|
89
|
+
tensor_data,
|
90
|
+
n_bits,
|
91
|
+
per_channel,
|
92
|
+
channel_axis)
|
93
|
+
# In case the tensor\axis has a single value, then min==max, so need to adjust either min or max to zero.
|
94
|
+
if not isinstance(mm[0], np.ndarray):
|
95
|
+
if mm[0] > 0:
|
96
|
+
mm = (np.float32(0).astype(mm[0].dtype), mm[1])
|
97
|
+
if mm[1] < 0:
|
98
|
+
mm = (mm[0], np.float32(0).astype(mm[1].dtype))
|
99
|
+
else:
|
100
|
+
adj_min_to_zero = np.logical_and(mm[1] == mm[0], mm[0] > 0)
|
101
|
+
adj_max_to_zero = np.logical_and(mm[1] == mm[0], mm[1] < 0)
|
102
|
+
mm[0][adj_min_to_zero] = 0
|
103
|
+
mm[1][adj_max_to_zero] = 0
|
78
104
|
return {RANGE_MIN: mm[0],
|
79
|
-
RANGE_MAX: mm[1]}
|
105
|
+
RANGE_MAX: mm[1]}, channel_axis
|
80
106
|
|
81
107
|
|
82
108
|
def uniform_selection_histogram(bins: np.ndarray,
|
@@ -158,7 +158,8 @@ def build_node(node: KerasNode,
|
|
158
158
|
if is_const(arg) or (
|
159
159
|
keras_layer.symbol in tf_function_symbols and
|
160
160
|
isinstance(arg, (tuple, list))):
|
161
|
-
|
161
|
+
if inputs_as_list or i in kwarg2index.values():
|
162
|
+
weights.update({i: to_numpy(arg, is_single_tensor=True)})
|
162
163
|
# remove weights and KerasTensors and weights from op_call_args
|
163
164
|
if inputs_as_list:
|
164
165
|
op_call_args = tuple(op_call_args[1:])
|
@@ -169,8 +170,7 @@ def build_node(node: KerasNode,
|
|
169
170
|
# read weights from call kwargs
|
170
171
|
weight_keys = []
|
171
172
|
for k, v in op_call_kwargs.items():
|
172
|
-
if is_const(v) or (keras_layer.
|
173
|
-
tf.matmul] and
|
173
|
+
if is_const(v) or (keras_layer.symbol in tf_function_symbols and
|
174
174
|
isinstance(v, (tuple, list))):
|
175
175
|
if k in kwarg2index:
|
176
176
|
weights.update({kwarg2index[k]: to_numpy(v, is_single_tensor=True)})
|
@@ -39,20 +39,16 @@ from mct_quantizers import PytorchQuantizationWrapper
|
|
39
39
|
def _build_input_tensors_list(node: BaseNode,
|
40
40
|
graph: Graph,
|
41
41
|
inputs: Tuple[Any],
|
42
|
-
node_to_output_tensors_dict: Dict[BaseNode, List]
|
43
|
-
is_op_quantize_wrapper: bool) -> List[List]:
|
42
|
+
node_to_output_tensors_dict: Dict[BaseNode, List]) -> List[List]:
|
44
43
|
"""
|
45
44
|
Given a node, build a list of input tensors the node gets. The list is built based on the
|
46
|
-
node's incoming edges, previous nodes' output tensors
|
47
|
-
Positional weights aren't used if the node's op is PytorchQuantizationWrapper, since it's
|
48
|
-
positional weights are already in the wrapper.
|
45
|
+
node's incoming edges, previous nodes' output tensors.
|
49
46
|
|
50
47
|
Args:
|
51
48
|
node: Node to build its input tensors list.
|
52
49
|
graph: Graph the node is in.
|
53
50
|
inputs: list of input tensors to model.
|
54
51
|
node_to_output_tensors_dict: A dictionary from a node to its output tensors.
|
55
|
-
is_op_quantize_wrapper: Whether the func_op is a PytorchQuantizationWrapper or not.
|
56
52
|
|
57
53
|
Returns:
|
58
54
|
A list of the node's input tensors.
|
@@ -67,35 +63,30 @@ def _build_input_tensors_list(node: BaseNode,
|
|
67
63
|
_input_tensors = node_to_output_tensors_dict[ie.source_node]
|
68
64
|
input_tensors.append(_input_tensors)
|
69
65
|
input_tensors = [tensor for tensor_list in input_tensors for tensor in tensor_list] # flat list of lists
|
70
|
-
input_tensors = node.insert_positional_weights_to_input_list(input_tensors)
|
71
|
-
# convert inputs from positional weights (numpy arrays) to tensors. Must handle each element in the
|
72
|
-
# list separately, because in FX the tensors are FX objects and fail to_torch_tensor
|
73
|
-
input_tensors = [to_torch_tensor(t, numpy_type=t.dtype) if isinstance(t, np.ndarray) else t
|
74
|
-
for t in input_tensors]
|
75
66
|
return input_tensors
|
76
67
|
|
77
68
|
|
78
69
|
def _merge_inputs(_node: BaseNode, input_tensors: List, op_call_args: List,
|
79
|
-
|
70
|
+
tensor_input_indices: List = None) -> List:
|
80
71
|
"""
|
81
|
-
Merge input tensors list with op_call_args, according to correct order.
|
72
|
+
Merge input tensors list with positional weights and op_call_args, according to correct order.
|
82
73
|
|
83
74
|
Args:
|
84
75
|
_node: The node the inputs are for.
|
85
76
|
input_tensors: activation input tensors to node.
|
86
77
|
op_call_args: framework node call args.
|
87
|
-
|
78
|
+
|
88
79
|
Returns:
|
89
80
|
Combined list of input_tensors and op_call_args.
|
90
81
|
"""
|
91
82
|
if isinstance(_node, FunctionalNode) and _node.tensor_input_indices:
|
92
83
|
_input_list = op_call_args.copy()
|
93
|
-
if
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
84
|
+
if tensor_input_indices is None:
|
85
|
+
tensor_input_indices = _node.tensor_input_indices
|
86
|
+
assert len(tensor_input_indices) == len(input_tensors), \
|
87
|
+
f'Mismatch between input tensors ({len(tensor_input_indices)}) and indices {len(input_tensors)}'
|
88
|
+
for i, t in zip(tensor_input_indices, input_tensors):
|
89
|
+
_input_list.insert(i, t)
|
99
90
|
else:
|
100
91
|
_input_list = input_tensors + op_call_args
|
101
92
|
|
@@ -126,10 +117,22 @@ def _run_operation(n: BaseNode,
|
|
126
117
|
op_call_args = n.op_call_args if isinstance(n, FunctionalNode) else []
|
127
118
|
functional_kwargs = n.op_call_kwargs if isinstance(n, FunctionalNode) else {}
|
128
119
|
|
120
|
+
if not (isinstance(n, FunctionalNode) and isinstance(op_func, PytorchQuantizationWrapper)):
|
121
|
+
# Insert positional weights only when not a quantized functional node, because quantized functional nodes
|
122
|
+
# insert the quantized weights in the wrapper.
|
123
|
+
input_tensors = n.insert_positional_weights_to_input_list(input_tensors)
|
124
|
+
# convert inputs from positional weights (numpy arrays) to tensors. Must handle each element in the
|
125
|
+
# list separately, because in FX the tensors are FX objects and fail to_torch_tensor
|
126
|
+
input_tensors = [to_torch_tensor(t, numpy_type=t.dtype) if isinstance(t, np.ndarray) else t
|
127
|
+
for t in input_tensors]
|
128
|
+
_tensor_input_indices = None
|
129
|
+
else:
|
130
|
+
_tensor_input_indices = [i for i in n.tensor_input_indices if i not in n.weights]
|
131
|
+
|
129
132
|
if isinstance(n, FunctionalNode) and n.inputs_as_list:
|
130
133
|
out_tensors_of_n_float = op_func(input_tensors, *op_call_args, **functional_kwargs)
|
131
134
|
else:
|
132
|
-
merged_inputs = _merge_inputs(n, input_tensors, op_call_args,
|
135
|
+
merged_inputs = _merge_inputs(n, input_tensors, op_call_args, tensor_input_indices=_tensor_input_indices)
|
133
136
|
out_tensors_of_n_float = op_func(*merged_inputs, **functional_kwargs)
|
134
137
|
|
135
138
|
# Add a fake quant node if the node has an activation threshold.
|
@@ -295,8 +298,7 @@ class PytorchModel(torch.nn.Module):
|
|
295
298
|
input_tensors = _build_input_tensors_list(node,
|
296
299
|
self.graph,
|
297
300
|
args,
|
298
|
-
node_to_output_tensors_dict
|
299
|
-
isinstance(op_func, PytorchQuantizationWrapper))
|
301
|
+
node_to_output_tensors_dict)
|
300
302
|
use_activation_quantization, activation_quantization_fn = self._get_activation_quantization_fn(node)
|
301
303
|
|
302
304
|
# Run node operation and fetch outputs
|
@@ -27,6 +27,8 @@ if FOUND_TF:
|
|
27
27
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1_pot.tpc_keras import get_keras_tpc as get_keras_tpc_v1_pot
|
28
28
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import get_keras_tpc as get_keras_tpc_v2
|
29
29
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2_lut.tpc_keras import get_keras_tpc as get_keras_tpc_v2_lut
|
30
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3.tpc_keras import get_keras_tpc as get_keras_tpc_v3
|
31
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3_lut.tpc_keras import get_keras_tpc as get_keras_tpc_v3_lut
|
30
32
|
|
31
33
|
# Keras: TPC versioning
|
32
34
|
keras_tpc_models_dict = {'v1': get_keras_tpc_v1,
|
@@ -34,6 +36,8 @@ if FOUND_TF:
|
|
34
36
|
'v1_pot': get_keras_tpc_v1_pot,
|
35
37
|
'v2': get_keras_tpc_v2,
|
36
38
|
'v2_lut': get_keras_tpc_v2_lut,
|
39
|
+
'v3': get_keras_tpc_v3,
|
40
|
+
'v3_lut': get_keras_tpc_v3_lut,
|
37
41
|
LATEST: get_keras_tpc_latest}
|
38
42
|
|
39
43
|
###############################
|
@@ -52,6 +56,10 @@ if FOUND_TORCH:
|
|
52
56
|
get_pytorch_tpc as get_pytorch_tpc_v2
|
53
57
|
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2_lut.tpc_pytorch import \
|
54
58
|
get_pytorch_tpc as get_pytorch_tpc_v2_lut
|
59
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3.tpc_pytorch import \
|
60
|
+
get_pytorch_tpc as get_pytorch_tpc_v3
|
61
|
+
from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v3_lut.tpc_pytorch import \
|
62
|
+
get_pytorch_tpc as get_pytorch_tpc_v3_lut
|
55
63
|
|
56
64
|
# Pytorch: TPC versioning
|
57
65
|
pytorch_tpc_models_dict = {'v1': get_pytorch_tpc_v1,
|
@@ -59,6 +67,8 @@ if FOUND_TORCH:
|
|
59
67
|
'v1_pot': get_pytorch_tpc_v1_pot,
|
60
68
|
'v2': get_pytorch_tpc_v2,
|
61
69
|
'v2_lut': get_pytorch_tpc_v2_lut,
|
70
|
+
'v3': get_pytorch_tpc_v3,
|
71
|
+
'v3_lut': get_pytorch_tpc_v3_lut,
|
62
72
|
LATEST: get_pytorch_tpc_latest}
|
63
73
|
|
64
74
|
tpc_dict = {TENSORFLOW: keras_tpc_models_dict,
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
__version__ = 'v3'
|
@@ -0,0 +1,222 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from typing import List, Tuple
|
16
|
+
|
17
|
+
import model_compression_toolkit as mct
|
18
|
+
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
19
|
+
from model_compression_toolkit.target_platform_capabilities.constants import KERNEL_ATTR, BIAS_ATTR, WEIGHTS_N_BITS
|
20
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform import OpQuantizationConfig, \
|
21
|
+
TargetPlatformModel
|
22
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform.op_quantization_config import \
|
23
|
+
AttributeQuantizationConfig
|
24
|
+
|
25
|
+
tp = mct.target_platform
|
26
|
+
|
27
|
+
|
28
|
+
def get_tp_model() -> TargetPlatformModel:
|
29
|
+
"""
|
30
|
+
A method that generates a default target platform model, with base 8-bit quantization configuration and 8, 4, 2
|
31
|
+
bits configuration list for mixed-precision quantization.
|
32
|
+
NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
|
33
|
+
(for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
|
34
|
+
'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
|
35
|
+
This version enables metadata by default.
|
36
|
+
|
37
|
+
Returns: A TargetPlatformModel object.
|
38
|
+
|
39
|
+
"""
|
40
|
+
base_config, mixed_precision_cfg_list, default_config = get_op_quantization_configs()
|
41
|
+
return generate_tp_model(default_config=default_config,
|
42
|
+
base_config=base_config,
|
43
|
+
mixed_precision_cfg_list=mixed_precision_cfg_list,
|
44
|
+
name='imx500_tp_model')
|
45
|
+
|
46
|
+
|
47
|
+
def get_op_quantization_configs() -> \
|
48
|
+
Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
|
49
|
+
"""
|
50
|
+
Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
|
51
|
+
In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
|
52
|
+
default configuration for mixed-precision quantization.
|
53
|
+
|
54
|
+
Returns: An OpQuantizationConfig config object and a list of OpQuantizationConfig objects.
|
55
|
+
|
56
|
+
"""
|
57
|
+
|
58
|
+
# TODO: currently, we don't want to quantize any attribute but the kernel by default,
|
59
|
+
# to preserve the current behavior of MCT, so quantization is disabled for all other attributes.
|
60
|
+
# Other quantization parameters are set to what we eventually want to quantize by default
|
61
|
+
# when we enable multi-attributes quantization - THIS NEED TO BE MODIFIED IN ALL TP MODELS!
|
62
|
+
|
63
|
+
# define a default quantization config for all non-specified weights attributes.
|
64
|
+
default_weight_attr_config = AttributeQuantizationConfig(
|
65
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
66
|
+
weights_n_bits=8,
|
67
|
+
weights_per_channel_threshold=False,
|
68
|
+
enable_weights_quantization=False, # TODO: this will changed to True once implementing multi-attributes quantization
|
69
|
+
lut_values_bitwidth=None)
|
70
|
+
|
71
|
+
# define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
|
72
|
+
kernel_base_config = AttributeQuantizationConfig(
|
73
|
+
weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
|
74
|
+
weights_n_bits=8,
|
75
|
+
weights_per_channel_threshold=True,
|
76
|
+
enable_weights_quantization=True,
|
77
|
+
lut_values_bitwidth=None)
|
78
|
+
|
79
|
+
# define a quantization config to quantize the bias (for layers where there is a bias attribute).
|
80
|
+
bias_config = AttributeQuantizationConfig(
|
81
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
82
|
+
weights_n_bits=FLOAT_BITWIDTH,
|
83
|
+
weights_per_channel_threshold=False,
|
84
|
+
enable_weights_quantization=False,
|
85
|
+
lut_values_bitwidth=None)
|
86
|
+
|
87
|
+
# Create a quantization config.
|
88
|
+
# A quantization configuration defines how an operator
|
89
|
+
# should be quantized on the modeled hardware:
|
90
|
+
|
91
|
+
# We define a default config for operation without kernel attribute.
|
92
|
+
# This is the default config that should be used for non-linear operations.
|
93
|
+
eight_bits_default = tp.OpQuantizationConfig(
|
94
|
+
default_weight_attr_config=default_weight_attr_config,
|
95
|
+
attr_weights_configs_mapping={},
|
96
|
+
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
97
|
+
activation_n_bits=8,
|
98
|
+
enable_activation_quantization=True,
|
99
|
+
quantization_preserving=False,
|
100
|
+
fixed_scale=None,
|
101
|
+
fixed_zero_point=None,
|
102
|
+
simd_size=32)
|
103
|
+
|
104
|
+
# We define an 8-bit config for linear operations quantization, that include a kernel and bias attributes.
|
105
|
+
linear_eight_bits = tp.OpQuantizationConfig(
|
106
|
+
default_weight_attr_config=default_weight_attr_config,
|
107
|
+
attr_weights_configs_mapping={KERNEL_ATTR: kernel_base_config, BIAS_ATTR: bias_config},
|
108
|
+
activation_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
|
109
|
+
activation_n_bits=8,
|
110
|
+
enable_activation_quantization=True,
|
111
|
+
quantization_preserving=False,
|
112
|
+
fixed_scale=None,
|
113
|
+
fixed_zero_point=None,
|
114
|
+
simd_size=32)
|
115
|
+
|
116
|
+
# To quantize a model using mixed-precision, create
|
117
|
+
# a list with more than one OpQuantizationConfig.
|
118
|
+
# In this example, we quantize some operations' weights
|
119
|
+
# using 2, 4 or 8 bits, and when using 2 or 4 bits, it's possible
|
120
|
+
# to quantize the operations' activations using LUT.
|
121
|
+
four_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 4}},
|
122
|
+
simd_size=linear_eight_bits.simd_size * 2)
|
123
|
+
two_bits = linear_eight_bits.clone_and_edit(attr_to_edit={KERNEL_ATTR: {WEIGHTS_N_BITS: 2}},
|
124
|
+
simd_size=linear_eight_bits.simd_size * 4)
|
125
|
+
|
126
|
+
mixed_precision_cfg_list = [linear_eight_bits, four_bits, two_bits]
|
127
|
+
|
128
|
+
return linear_eight_bits, mixed_precision_cfg_list, eight_bits_default
|
129
|
+
|
130
|
+
|
131
|
+
def generate_tp_model(default_config: OpQuantizationConfig,
|
132
|
+
base_config: OpQuantizationConfig,
|
133
|
+
mixed_precision_cfg_list: List[OpQuantizationConfig],
|
134
|
+
name: str) -> TargetPlatformModel:
|
135
|
+
"""
|
136
|
+
Generates TargetPlatformModel with default defined Operators Sets, based on the given base configuration and
|
137
|
+
mixed-precision configurations options list.
|
138
|
+
|
139
|
+
Args
|
140
|
+
default_config: A default OpQuantizationConfig to set as the TP model default configuration.
|
141
|
+
base_config: An OpQuantizationConfig to set as the TargetPlatformModel base configuration for mixed-precision purposes only.
|
142
|
+
mixed_precision_cfg_list: A list of OpQuantizationConfig to be used as the TP model mixed-precision
|
143
|
+
quantization configuration options.
|
144
|
+
name: The name of the TargetPlatformModel.
|
145
|
+
|
146
|
+
Returns: A TargetPlatformModel object.
|
147
|
+
|
148
|
+
"""
|
149
|
+
# Create a QuantizationConfigOptions, which defines a set
|
150
|
+
# of possible configurations to consider when quantizing a set of operations (in mixed-precision, for example).
|
151
|
+
# If the QuantizationConfigOptions contains only one configuration,
|
152
|
+
# this configuration will be used for the operation quantization:
|
153
|
+
default_configuration_options = tp.QuantizationConfigOptions([default_config])
|
154
|
+
|
155
|
+
# Create a QuantizationConfigOptions for quantizing constants in functional ops.
|
156
|
+
# Constant configuration is similar to the default eight bit configuration except for PoT
|
157
|
+
# quantization method for the constant.
|
158
|
+
# Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
|
159
|
+
# define the desired quantization properties for them.
|
160
|
+
const_config = default_config.clone_and_edit(
|
161
|
+
default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
|
162
|
+
enable_weights_quantization=True, weights_per_channel_threshold=True,
|
163
|
+
weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO))
|
164
|
+
const_configuration_options = tp.QuantizationConfigOptions([const_config])
|
165
|
+
|
166
|
+
# Create a TargetPlatformModel and set its default quantization config.
|
167
|
+
# This default configuration will be used for all operations
|
168
|
+
# unless specified otherwise (see OperatorsSet, for example):
|
169
|
+
generated_tpm = tp.TargetPlatformModel(default_configuration_options, add_metadata=True, name=name)
|
170
|
+
|
171
|
+
# To start defining the model's components (such as operator sets, and fusing patterns),
|
172
|
+
# use 'with' the TargetPlatformModel instance, and create them as below:
|
173
|
+
with generated_tpm:
|
174
|
+
# Create an OperatorsSet to represent a set of operations.
|
175
|
+
# Each OperatorsSet has a unique label.
|
176
|
+
# If a quantization configuration options is passed, these options will
|
177
|
+
# be used for operations that will be attached to this set's label.
|
178
|
+
# Otherwise, it will be a configure-less set (used in fusing):
|
179
|
+
|
180
|
+
generated_tpm.set_simd_padding(is_simd_padding=True)
|
181
|
+
|
182
|
+
# May suit for operations like: Dropout, Reshape, etc.
|
183
|
+
default_qco = tp.get_default_quantization_config_options()
|
184
|
+
tp.OperatorsSet("NoQuantization",
|
185
|
+
default_qco.clone_and_edit(enable_activation_quantization=False)
|
186
|
+
.clone_and_edit_weight_attribute(enable_weights_quantization=False))
|
187
|
+
|
188
|
+
# Create Mixed-Precision quantization configuration options from the given list of OpQuantizationConfig objects
|
189
|
+
mixed_precision_configuration_options = tp.QuantizationConfigOptions(mixed_precision_cfg_list,
|
190
|
+
base_config=base_config)
|
191
|
+
|
192
|
+
# Define operator sets that use mixed_precision_configuration_options:
|
193
|
+
conv = tp.OperatorsSet("Conv", mixed_precision_configuration_options)
|
194
|
+
fc = tp.OperatorsSet("FullyConnected", mixed_precision_configuration_options)
|
195
|
+
|
196
|
+
# Define operations sets without quantization configuration
|
197
|
+
# options (useful for creating fusing patterns, for example):
|
198
|
+
any_relu = tp.OperatorsSet("AnyReLU")
|
199
|
+
add = tp.OperatorsSet("Add", const_configuration_options)
|
200
|
+
sub = tp.OperatorsSet("Sub", const_configuration_options)
|
201
|
+
mul = tp.OperatorsSet("Mul", const_configuration_options)
|
202
|
+
div = tp.OperatorsSet("Div", const_configuration_options)
|
203
|
+
prelu = tp.OperatorsSet("PReLU")
|
204
|
+
swish = tp.OperatorsSet("Swish")
|
205
|
+
sigmoid = tp.OperatorsSet("Sigmoid")
|
206
|
+
tanh = tp.OperatorsSet("Tanh")
|
207
|
+
|
208
|
+
# Combine multiple operators into a single operator to avoid quantization between
|
209
|
+
# them. To do this we define fusing patterns using the OperatorsSets that were created.
|
210
|
+
# To group multiple sets with regard to fusing, an OperatorSetConcat can be created
|
211
|
+
activations_after_conv_to_fuse = tp.OperatorSetConcat(any_relu, swish, prelu, sigmoid, tanh)
|
212
|
+
activations_after_fc_to_fuse = tp.OperatorSetConcat(any_relu, swish, sigmoid)
|
213
|
+
any_binary = tp.OperatorSetConcat(add, sub, mul, div)
|
214
|
+
|
215
|
+
# ------------------- #
|
216
|
+
# Fusions
|
217
|
+
# ------------------- #
|
218
|
+
tp.Fusing([conv, activations_after_conv_to_fuse])
|
219
|
+
tp.Fusing([fc, activations_after_fc_to_fuse])
|
220
|
+
tp.Fusing([any_binary, any_relu])
|
221
|
+
|
222
|
+
return generated_tpm
|