mct-nightly 1.9.0.20230813.post401__py3-none-any.whl → 1.9.0.20230814.post352__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/METADATA +1 -1
- {mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/RECORD +16 -16
- model_compression_toolkit/constants.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_params_generation/kmeans_params.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +11 -11
- model_compression_toolkit/core/common/quantization/quantizers/kmeans_quantizer.py +4 -4
- model_compression_toolkit/core/common/quantization/quantizers/lut_kmeans_quantizer.py +9 -9
- model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +4 -4
- model_compression_toolkit/core/keras/quantizer/lut_fake_quant.py +10 -10
- model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py +9 -9
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +3 -3
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +6 -6
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +1 -1
- {mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/WHEEL +0 -0
- {mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/top_level.txt +0 -0
{mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/RECORD
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
model_compression_toolkit/__init__.py,sha256=Jvow82lnEcGy7Wr1diXvcgWoWvIbOryYekqdMYVve4I,3608
|
|
2
|
-
model_compression_toolkit/constants.py,sha256=
|
|
2
|
+
model_compression_toolkit/constants.py,sha256=C_kTcbYhWv90JoZVu4kT21b7aHaLEmsA--VDFXbiAJs,3817
|
|
3
3
|
model_compression_toolkit/logger.py,sha256=b9DVktZ-LymFcRxv2aL_sdiE6S2sSrFGWltx6dgEuUY,4863
|
|
4
4
|
model_compression_toolkit/core/__init__.py,sha256=qnBA6aaojI7RpEQZU2vXWiELHfVJf-MnAP-4T0tcFDY,2008
|
|
5
5
|
model_compression_toolkit/core/analyzer.py,sha256=dbsD61pakp_9JXNyAScLdtJvcXny9jr_cMbET0Bd3Sg,2975
|
|
@@ -89,8 +89,8 @@ model_compression_toolkit/core/common/quantization/quantize_node.py,sha256=UK_Ys
|
|
|
89
89
|
model_compression_toolkit/core/common/quantization/set_node_quantization_config.py,sha256=KuYd3fHdTKK8Pg1hLw8zB1CpexyltJOpQMcKMvLJmB8,10683
|
|
90
90
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/__init__.py,sha256=_U4IFPuzGyyAymjDjsPl2NF6UbFggqBaiA1Td3sug3I,1608
|
|
91
91
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py,sha256=rwCedE0zggamSBY50rqh-xqZpIMrn8o96YH_jMCuPrk,16505
|
|
92
|
-
model_compression_toolkit/core/common/quantization/quantization_params_generation/kmeans_params.py,sha256=
|
|
93
|
-
model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=
|
|
92
|
+
model_compression_toolkit/core/common/quantization/quantization_params_generation/kmeans_params.py,sha256=qDfJbvY64KLOG6n18ddEPTFGrKHlaXzZ136TrVpgH9s,2917
|
|
93
|
+
model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=V__fKVcr6eCJcr7nmAJS24hBW6Wj33d-pKWLa8KwL4A,7289
|
|
94
94
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/outlier_filter.py,sha256=9gnfJV89jpGwAx8ImJ5E9NjCv3lDtbyulP4OtgWb62M,1772
|
|
95
95
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=W4j9IB1Grj_Ku1pLjPxb-HLcYU9LTDuf9_0JilbqU2w,8484
|
|
96
96
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py,sha256=noEdvGiyyW7acgQ2OFWLedCODibTGYJifC9qo8YIU5U,4558
|
|
@@ -100,9 +100,9 @@ model_compression_toolkit/core/common/quantization/quantization_params_generatio
|
|
|
100
100
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=53OFL3IZxtH-UPakf3h_LZkaZAa3cgc2oqgMUe3Sg8o,9689
|
|
101
101
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py,sha256=oiJn1twYpTaq_z5qX4d8_nnk_jouYWHq8z0WAknl5oE,7879
|
|
102
102
|
model_compression_toolkit/core/common/quantization/quantizers/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
|
103
|
-
model_compression_toolkit/core/common/quantization/quantizers/kmeans_quantizer.py,sha256=
|
|
104
|
-
model_compression_toolkit/core/common/quantization/quantizers/lut_kmeans_quantizer.py,sha256=
|
|
105
|
-
model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py,sha256=
|
|
103
|
+
model_compression_toolkit/core/common/quantization/quantizers/kmeans_quantizer.py,sha256=ZS3IXGbUTW580vwVF5jgxfPVYVL3tQrpvoSqDxVu7zQ,2325
|
|
104
|
+
model_compression_toolkit/core/common/quantization/quantizers/lut_kmeans_quantizer.py,sha256=P0x_y18LypBxP2tV9OWizheYfILqvaMC8RwHo04sUpQ,2761
|
|
105
|
+
model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py,sha256=5JuPwb9HDHaYQj1YyNWGY7GdjJ105Yr8iEEZhzfuRW4,14190
|
|
106
106
|
model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py,sha256=FVeuK-LeuAsRFcqo5uaNHmb6oTOFs21ltghtqswl6KM,5486
|
|
107
107
|
model_compression_toolkit/core/common/statistics_correction/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
|
108
108
|
model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py,sha256=uUMgzZaGYf2rcaCQTIk2N0CaF5DZE5ZsHyAEXtSeCz0,3434
|
|
@@ -167,7 +167,7 @@ model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quanti
|
|
|
167
167
|
model_compression_toolkit/core/keras/quantizer/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
|
168
168
|
model_compression_toolkit/core/keras/quantizer/base_quantizer.py,sha256=eMRjAUU189-AVwNGMlV0M-ZlL48ZYmILzutheUT00xU,1628
|
|
169
169
|
model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py,sha256=Oi64CD83OopPoQNAarl2MJRbCKujU2W8Wdrs9KOPNWk,6151
|
|
170
|
-
model_compression_toolkit/core/keras/quantizer/lut_fake_quant.py,sha256=
|
|
170
|
+
model_compression_toolkit/core/keras/quantizer/lut_fake_quant.py,sha256=Up3-sbuAcaJ6kfe7Sz3XN6iiJ9hlxzOMncLCFEXJFjk,4475
|
|
171
171
|
model_compression_toolkit/core/keras/reader/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
|
172
172
|
model_compression_toolkit/core/keras/reader/common.py,sha256=ScJaCiM8FuisN3sLbVsMQiIzkFlzr-gYEeL8M5lkoew,2627
|
|
173
173
|
model_compression_toolkit/core/keras/reader/connectivity_handler.py,sha256=83Abmg-AILnN6NHJexI226ZaHEblLOX3H9sH6vXFQhA,11418
|
|
@@ -221,7 +221,7 @@ model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_q
|
|
|
221
221
|
model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py,sha256=VUN9vvWQWAh281C0xgV3w4T2DkSaxFZ-xmBgF50vGdo,5961
|
|
222
222
|
model_compression_toolkit/core/pytorch/quantizer/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
|
223
223
|
model_compression_toolkit/core/pytorch/quantizer/fake_quant_builder.py,sha256=rox-f5wbRyxU1UHeHyaoIDXB9r9fCXm1dPN4FVwHqTc,6464
|
|
224
|
-
model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py,sha256=
|
|
224
|
+
model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py,sha256=uyeBtNokyDUikk-YkDP_mN_2DX0J5oPm3kSfdSUT2Ck,4420
|
|
225
225
|
model_compression_toolkit/core/pytorch/reader/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
|
226
226
|
model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=bZxt288NjFD_VzsguYRfiGSkSekUT6vX5MhGwYywzHY,12113
|
|
227
227
|
model_compression_toolkit/core/pytorch/reader/node_holders.py,sha256=dMvTTs1DER6TJ0FiSEmSdLa27WKpvP2Hz-kILUcnnIA,1789
|
|
@@ -250,13 +250,13 @@ model_compression_toolkit/exporter/model_wrapper/keras/__init__.py,sha256=cco4Tm
|
|
|
250
250
|
model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py,sha256=XMXcszmMSEgeIc1tCNZqWjsyFt9ZmcgfWZQXDqnDMjM,3509
|
|
251
251
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
252
252
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py,sha256=P_701NG1OYwyxgbtPSjb-OYseDeiHjye8m9wjpAl6es,4253
|
|
253
|
-
model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py,sha256=
|
|
253
|
+
model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py,sha256=VFX8rvlaGTK-k-3g0ayAL_qJJR_6wtS54tn9RzxKev4,8755
|
|
254
254
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizers.py,sha256=n7VTA-a9TrLFpfdYAqrAKj6PGlAyLq8-xdwnMMpX71k,2077
|
|
255
255
|
model_compression_toolkit/exporter/model_wrapper/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
|
256
256
|
model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py,sha256=gvX5ILs5vjQ_F_dq5KaFs0GOQEq9gYXO5a6YZlYY8h4,3449
|
|
257
257
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
258
258
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py,sha256=SJ5fetbUMkmB0tkHkmVhMrLksh7eqMQJLFuMD08ZKWM,3921
|
|
259
|
-
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py,sha256=
|
|
259
|
+
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py,sha256=66QXUQI1yvov9QXCW1s8LLN9N7gFwvVrPJDfzHayWZM,8635
|
|
260
260
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizers.py,sha256=hinP-wtyxZyoW860GdJAk6M3iPjmwwPXQTUxd56yhq8,2086
|
|
261
261
|
model_compression_toolkit/gptq/__init__.py,sha256=2xos6AJziEy-eK91XtIJlunf8LhK4OayU7d6CQvXWsw,1276
|
|
262
262
|
model_compression_toolkit/gptq/runner.py,sha256=vWd7cWKgTGc9oPcTtwTQZoI3MArCx19Y61uteLFCxVo,5534
|
|
@@ -336,7 +336,7 @@ model_compression_toolkit/target_platform_capabilities/immutable.py,sha256=rSPd3
|
|
|
336
336
|
model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py,sha256=_LzyDupsTDiJvIsVA-L-M_fRrW8ePcul8mr60L8DW9g,1574
|
|
337
337
|
model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py,sha256=5Bu5MkOYYDGzZgTu-PBQ4xVCnso1mtssc9zz1pZjl7o,2010
|
|
338
338
|
model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py,sha256=NIKUE2AtRv4CFOhpwjVvfG3rLfvd6p7DYBSuK0SKo4s,2353
|
|
339
|
-
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py,sha256=
|
|
339
|
+
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py,sha256=lkiIhVdLML6FY3F_S-v01ZDa9lUUxzNl1rbu0f-6H2w,8540
|
|
340
340
|
model_compression_toolkit/target_platform_capabilities/target_platform/operators.py,sha256=rRmrmPBY4rxCWVpEc6FxeOPUFh8MkfwgQsqD82U9a7w,3108
|
|
341
341
|
model_compression_toolkit/target_platform_capabilities/target_platform/quantization_format.py,sha256=3UIZtGTV0WX3dbfiIMUFWID5W68vtKfiVoPWUbpQFzM,787
|
|
342
342
|
model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py,sha256=dFauUrY7BejPDVX8HcSotoHKcT7S9kk65jgzZdPis2E,9206
|
|
@@ -421,8 +421,8 @@ model_compression_toolkit/trainable_infrastructure/keras/load_model.py,sha256=Dw
|
|
|
421
421
|
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
|
|
422
422
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
|
423
423
|
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=SbvRlIdE32PEBsINt1bhSqvrKL_zbM9V-aeSkOn-sw4,3083
|
|
424
|
-
mct_nightly-1.9.0.
|
|
425
|
-
mct_nightly-1.9.0.
|
|
426
|
-
mct_nightly-1.9.0.
|
|
427
|
-
mct_nightly-1.9.0.
|
|
428
|
-
mct_nightly-1.9.0.
|
|
424
|
+
mct_nightly-1.9.0.20230814.post352.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
|
425
|
+
mct_nightly-1.9.0.20230814.post352.dist-info/METADATA,sha256=NqhVoJcRgeb3DDNnTzIr4lTEk7fqgP4uMShzIt9b2dg,10750
|
|
426
|
+
mct_nightly-1.9.0.20230814.post352.dist-info/WHEEL,sha256=5sUXSg9e4bi7lTLOHcm6QEYwO5TIF1TNbTSVFVjcJcc,92
|
|
427
|
+
mct_nightly-1.9.0.20230814.post352.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
|
428
|
+
mct_nightly-1.9.0.20230814.post352.dist-info/RECORD,,
|
|
@@ -27,13 +27,13 @@ WEIGHTS_SIGNED = True
|
|
|
27
27
|
# Minimal threshold to use for quantization ranges:
|
|
28
28
|
MIN_THRESHOLD = (2 ** -16)
|
|
29
29
|
EPS = 1e-8
|
|
30
|
-
|
|
30
|
+
LUT_VALUES_BITWIDTH = 8
|
|
31
31
|
|
|
32
32
|
# Quantization attributes:
|
|
33
33
|
OUTPUT_SCALE = 'output_scale'
|
|
34
34
|
THRESHOLD = 'threshold'
|
|
35
35
|
SIGNED = 'is_signed'
|
|
36
|
-
|
|
36
|
+
LUT_VALUES = 'lut_values'
|
|
37
37
|
SCALE_PER_CHANNEL = 'scale_per_channel'
|
|
38
38
|
RANGE_MIN = 'range_min'
|
|
39
39
|
RANGE_MAX = 'range_max'
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/kmeans_params.py
CHANGED
|
@@ -17,7 +17,7 @@ import numpy as np
|
|
|
17
17
|
from sklearn.cluster import KMeans
|
|
18
18
|
|
|
19
19
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
|
20
|
-
from model_compression_toolkit.constants import
|
|
20
|
+
from model_compression_toolkit.constants import LUT_VALUES, SCALE_PER_CHANNEL, MIN_THRESHOLD, EPS
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def kmeans_tensor(tensor_data: np.ndarray,
|
|
@@ -59,6 +59,6 @@ def kmeans_tensor(tensor_data: np.ndarray,
|
|
|
59
59
|
tensor_for_kmeans = (tensor_data / (scales_per_channel + EPS))
|
|
60
60
|
kmeans.fit(tensor_for_kmeans.reshape(-1, 1))
|
|
61
61
|
|
|
62
|
-
return {
|
|
62
|
+
return {LUT_VALUES: kmeans.cluster_centers_,
|
|
63
63
|
SCALE_PER_CHANNEL: scales_per_channel,
|
|
64
64
|
}
|
|
@@ -17,8 +17,8 @@ import numpy as np
|
|
|
17
17
|
from sklearn.cluster import KMeans
|
|
18
18
|
|
|
19
19
|
import model_compression_toolkit.core.common.quantization.quantization_config as qc
|
|
20
|
-
from model_compression_toolkit.constants import
|
|
21
|
-
|
|
20
|
+
from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL, \
|
|
21
|
+
LUT_VALUES_BITWIDTH, THRESHOLD
|
|
22
22
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import \
|
|
23
23
|
max_power_of_two, int_quantization_with_threshold
|
|
24
24
|
from model_compression_toolkit.core.common.quantization.quantization_params_generation.symmetric_selection import \
|
|
@@ -41,7 +41,7 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
|
|
|
41
41
|
"""
|
|
42
42
|
The quantizer first finds the closest max value per channel of tensor_data.
|
|
43
43
|
Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
|
|
44
|
-
[-2^(
|
|
44
|
+
[-2^(LUT_VALUES_BITWIDTH-1), 2^(LUT_VALUES_BITWIDTH-1)-1].
|
|
45
45
|
Next, we take the scaled tensor_data and perform k-means clustering with 2^nbit clusters.
|
|
46
46
|
We return the rounded cluster centers, and threshold per channel. We use these to quantize the data.
|
|
47
47
|
Args:
|
|
@@ -59,9 +59,9 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
|
|
|
59
59
|
A dictionary containing the cluster assignments according to the k-means algorithm,
|
|
60
60
|
the thresholds per channel and the multiplier num bits.
|
|
61
61
|
"""
|
|
62
|
-
if n_bits >=
|
|
62
|
+
if n_bits >= LUT_VALUES_BITWIDTH:
|
|
63
63
|
Logger.critical(f'Look-Up-Table bit configuration has {n_bits} bits, but must be less than '
|
|
64
|
-
f'{
|
|
64
|
+
f'{LUT_VALUES_BITWIDTH}') # pragma: no cover
|
|
65
65
|
# TODO: need to set this externally
|
|
66
66
|
if len(np.unique(tensor_data.flatten())) < 2 ** n_bits:
|
|
67
67
|
n_clusters = len(np.unique(tensor_data.flatten()))
|
|
@@ -74,10 +74,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
|
|
|
74
74
|
channel_axis, n_iter, min_threshold,
|
|
75
75
|
qc.QuantizationErrorMethod.NOCLIPPING)[THRESHOLD]
|
|
76
76
|
|
|
77
|
-
tensor_for_kmeans = int_quantization_with_threshold(tensor_data, thresholds_per_channel,
|
|
77
|
+
tensor_for_kmeans = int_quantization_with_threshold(tensor_data, thresholds_per_channel, LUT_VALUES_BITWIDTH)
|
|
78
78
|
kmeans.fit(tensor_for_kmeans.reshape(-1, 1))
|
|
79
79
|
|
|
80
|
-
return {
|
|
80
|
+
return {LUT_VALUES: np.round(kmeans.cluster_centers_),
|
|
81
81
|
SCALE_PER_CHANNEL: thresholds_per_channel}
|
|
82
82
|
|
|
83
83
|
|
|
@@ -115,9 +115,9 @@ def lut_kmeans_histogram(bins: np.ndarray,
|
|
|
115
115
|
the threshold for pre-clustering quantization.
|
|
116
116
|
"""
|
|
117
117
|
|
|
118
|
-
if n_bits >=
|
|
118
|
+
if n_bits >= LUT_VALUES_BITWIDTH:
|
|
119
119
|
Logger.critical(f'Look-Up-Table bit configuration has {n_bits} bits. It must be less then '
|
|
120
|
-
f'{
|
|
120
|
+
f'{LUT_VALUES_BITWIDTH}') # pragma: no cover
|
|
121
121
|
|
|
122
122
|
bins_with_values = np.abs(bins)[1:][counts > 0]
|
|
123
123
|
if len(np.unique(bins_with_values.flatten())) < 2 ** n_bits:
|
|
@@ -130,8 +130,8 @@ def lut_kmeans_histogram(bins: np.ndarray,
|
|
|
130
130
|
threshold = max_power_of_two(tensor_max, min_threshold)
|
|
131
131
|
|
|
132
132
|
signed = np.any(bins[:-1][counts != 0] < 0) # Whether histogram contains negative values or not.
|
|
133
|
-
tensor_for_kmeans = int_quantization_with_threshold(data=bins, threshold=threshold, n_bits=
|
|
133
|
+
tensor_for_kmeans = int_quantization_with_threshold(data=bins, threshold=threshold, n_bits=LUT_VALUES_BITWIDTH, signed=signed)
|
|
134
134
|
kmeans.fit(tensor_for_kmeans.reshape(-1, 1), sample_weight=np.insert(counts, 0, 0))
|
|
135
135
|
|
|
136
|
-
return {
|
|
136
|
+
return {LUT_VALUES: np.float32(np.round(kmeans.cluster_centers_)),
|
|
137
137
|
THRESHOLD: threshold}
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
from sklearn.cluster import KMeans
|
|
17
17
|
import numpy as np
|
|
18
18
|
|
|
19
|
-
from model_compression_toolkit.constants import
|
|
19
|
+
from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL
|
|
20
20
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import kmeans_assign_clusters
|
|
21
21
|
|
|
22
22
|
|
|
@@ -42,12 +42,12 @@ def kmeans_quantizer(tensor_data: np.ndarray,
|
|
|
42
42
|
Quantized data.
|
|
43
43
|
"""
|
|
44
44
|
eps = 1e-8
|
|
45
|
-
|
|
45
|
+
lut_values = quantization_params[LUT_VALUES]
|
|
46
46
|
scales_per_channel = quantization_params[SCALE_PER_CHANNEL]
|
|
47
47
|
tensor = (tensor_data / (scales_per_channel + eps))
|
|
48
48
|
shape_before_kmeans = tensor.shape
|
|
49
|
-
cluster_assignments = kmeans_assign_clusters(
|
|
50
|
-
quant_tensor =
|
|
49
|
+
cluster_assignments = kmeans_assign_clusters(lut_values, tensor.reshape(-1, 1))
|
|
50
|
+
quant_tensor = lut_values[cluster_assignments].reshape(shape_before_kmeans)
|
|
51
51
|
if per_channel:
|
|
52
52
|
quant_tensor = (quant_tensor * scales_per_channel)
|
|
53
53
|
return quant_tensor
|
|
@@ -15,8 +15,8 @@
|
|
|
15
15
|
|
|
16
16
|
import numpy as np
|
|
17
17
|
|
|
18
|
-
from model_compression_toolkit.constants import
|
|
19
|
-
|
|
18
|
+
from model_compression_toolkit.constants import LUT_VALUES, SCALE_PER_CHANNEL, \
|
|
19
|
+
LUT_VALUES_BITWIDTH
|
|
20
20
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import kmeans_assign_clusters, \
|
|
21
21
|
get_quantized_tensor, int_quantization_with_threshold
|
|
22
22
|
|
|
@@ -30,8 +30,8 @@ def lut_kmeans_quantizer(tensor_data: np.ndarray,
|
|
|
30
30
|
"""
|
|
31
31
|
Quantize a tensor with given cluster centers and thresholds-per-channel vector.
|
|
32
32
|
1. We divide tensor_data with the scale vector per channel.
|
|
33
|
-
2. We scale the result to the range [-2^(
|
|
34
|
-
3. We assign cluster centers to every value, multiply by thresholds_per_channel and divide by 2^(
|
|
33
|
+
2. We scale the result to the range [-2^(LUT_VALUES_BITWIDTH-1), 2^(LUT_VALUES_BITWIDTH-1)-1].
|
|
34
|
+
3. We assign cluster centers to every value, multiply by thresholds_per_channel and divide by 2^(LUT_VALUES_BITWIDTH-1).
|
|
35
35
|
The result is the quantized tensor.
|
|
36
36
|
|
|
37
37
|
|
|
@@ -46,12 +46,12 @@ def lut_kmeans_quantizer(tensor_data: np.ndarray,
|
|
|
46
46
|
Returns:
|
|
47
47
|
Quantized data.
|
|
48
48
|
"""
|
|
49
|
-
|
|
49
|
+
lut_values = quantization_params[LUT_VALUES]
|
|
50
50
|
thresholds_per_channel = quantization_params[SCALE_PER_CHANNEL]
|
|
51
|
-
tensor = int_quantization_with_threshold(tensor_data, thresholds_per_channel,
|
|
51
|
+
tensor = int_quantization_with_threshold(tensor_data, thresholds_per_channel, LUT_VALUES_BITWIDTH)
|
|
52
52
|
shape_before_kmeans = tensor.shape
|
|
53
|
-
cluster_assignments = kmeans_assign_clusters(
|
|
54
|
-
quant_tensor = get_quantized_tensor(
|
|
53
|
+
cluster_assignments = kmeans_assign_clusters(lut_values, tensor.reshape(-1, 1))
|
|
54
|
+
quant_tensor = get_quantized_tensor(lut_values[cluster_assignments].reshape(shape_before_kmeans),
|
|
55
55
|
thresholds_per_channel,
|
|
56
|
-
|
|
56
|
+
LUT_VALUES_BITWIDTH)
|
|
57
57
|
return quant_tensor
|
|
@@ -151,12 +151,12 @@ def uniform_quantize_tensor(tensor_data: np.ndarray,
|
|
|
151
151
|
return q
|
|
152
152
|
|
|
153
153
|
|
|
154
|
-
def kmeans_assign_clusters(
|
|
154
|
+
def kmeans_assign_clusters(lut_values: np.ndarray,
|
|
155
155
|
query: np.ndarray) -> np.ndarray:
|
|
156
156
|
"""
|
|
157
157
|
Assign each data value in query with its closest cluster center point.
|
|
158
158
|
Args:
|
|
159
|
-
|
|
159
|
+
lut_values: the cluster centers to assign the query values.
|
|
160
160
|
query: values for which to assign cluster centers.
|
|
161
161
|
|
|
162
162
|
Returns: A tensor of indexes to the cluster centers that where assigned to each value in
|
|
@@ -164,9 +164,9 @@ def kmeans_assign_clusters(cluster_centers: np.ndarray,
|
|
|
164
164
|
|
|
165
165
|
"""
|
|
166
166
|
d0 = query.shape[0]
|
|
167
|
-
d1 =
|
|
167
|
+
d1 = lut_values.shape[0]
|
|
168
168
|
query_ = query.repeat(d1).reshape(d0, d1)
|
|
169
|
-
cluster_centers_ =
|
|
169
|
+
cluster_centers_ = lut_values.repeat(d0).reshape(d1, d0).transpose(1, 0)
|
|
170
170
|
return np.argmin(np.abs(query_ - cluster_centers_), axis=1)
|
|
171
171
|
|
|
172
172
|
|
|
@@ -5,8 +5,8 @@ import tensorflow as tf
|
|
|
5
5
|
from keras.layers import Layer
|
|
6
6
|
from tensorflow.python.util.object_identity import Reference as TFReference
|
|
7
7
|
|
|
8
|
-
from model_compression_toolkit.constants import SIGNED,
|
|
9
|
-
|
|
8
|
+
from model_compression_toolkit.constants import SIGNED, LUT_VALUES, EPS, \
|
|
9
|
+
LUT_VALUES_BITWIDTH, THRESHOLD
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
def activation_lut_kmean_quantizer(activation_n_bits: int,
|
|
@@ -29,14 +29,14 @@ def activation_lut_kmean_quantizer(activation_n_bits: int,
|
|
|
29
29
|
|
|
30
30
|
class LUTFakeQuant(Layer):
|
|
31
31
|
"""
|
|
32
|
-
A custom Keras layer for quantizing activation tensor with non-uniform quantization (using lookup table
|
|
32
|
+
A custom Keras layer for quantizing activation tensor with non-uniform quantization (using lookup table values).
|
|
33
33
|
"""
|
|
34
34
|
|
|
35
35
|
def __init__(self, quantization_params: Dict[str, np.ndarray], **kwargs):
|
|
36
36
|
super(LUTFakeQuant, self).__init__(**kwargs)
|
|
37
37
|
self.quantization_params = quantization_params
|
|
38
38
|
self.activation_is_signed = self.quantization_params.get(SIGNED)
|
|
39
|
-
self.
|
|
39
|
+
self.lut_values = self.quantization_params.get(LUT_VALUES)
|
|
40
40
|
self.threshold = self.quantization_params.get(THRESHOLD)
|
|
41
41
|
|
|
42
42
|
def build(self, input_shape: Tuple[int]):
|
|
@@ -59,7 +59,7 @@ class LUTFakeQuant(Layer):
|
|
|
59
59
|
Returns: KerasTensor after applying a non-uniform fake quantization.
|
|
60
60
|
|
|
61
61
|
"""
|
|
62
|
-
if self.activation_is_signed is None or self.
|
|
62
|
+
if self.activation_is_signed is None or self.lut_values is None or self.threshold is None:
|
|
63
63
|
return None # pragma: no cover
|
|
64
64
|
|
|
65
65
|
_quant_output = self.lut_kmeans_quantizer(input_data)
|
|
@@ -79,14 +79,14 @@ class LUTFakeQuant(Layer):
|
|
|
79
79
|
Returns: Quantized tensor.
|
|
80
80
|
"""
|
|
81
81
|
|
|
82
|
-
tensor = self.int_quantization_with_threshold(tensor_data,
|
|
82
|
+
tensor = self.int_quantization_with_threshold(tensor_data, LUT_VALUES_BITWIDTH)
|
|
83
83
|
tensor = tf.expand_dims(tensor, -1)
|
|
84
84
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
centers = tf.gather(self.
|
|
85
|
+
expanded_lut_values = self.lut_values.reshape([*[1 for _ in range(len(tensor.shape)-1)], -1])
|
|
86
|
+
lut_values_assignments = tf.argmin(tf.abs(tensor - expanded_lut_values), axis=-1)
|
|
87
|
+
centers = tf.gather(self.lut_values.flatten(), lut_values_assignments)
|
|
88
88
|
|
|
89
|
-
quant_tensor = (centers / (2 ** (
|
|
89
|
+
quant_tensor = (centers / (2 ** (LUT_VALUES_BITWIDTH - int(self.activation_is_signed)))) * self.threshold
|
|
90
90
|
|
|
91
91
|
return quant_tensor
|
|
92
92
|
|
|
@@ -3,7 +3,7 @@ from typing import Dict, Callable
|
|
|
3
3
|
import torch
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
-
from model_compression_toolkit.constants import SIGNED,
|
|
6
|
+
from model_compression_toolkit.constants import SIGNED, LUT_VALUES, THRESHOLD, LUT_VALUES_BITWIDTH, EPS
|
|
7
7
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
|
8
8
|
|
|
9
9
|
|
|
@@ -27,7 +27,7 @@ def activation_lut_kmean_quantizer(activation_n_bits: int,
|
|
|
27
27
|
|
|
28
28
|
class PytorchLUTFakeQuant(torch.nn.Module):
|
|
29
29
|
"""
|
|
30
|
-
A custom PyTorch layer for quantizing activation tensor with non-uniform quantization (using lookup table
|
|
30
|
+
A custom PyTorch layer for quantizing activation tensor with non-uniform quantization (using lookup table values).
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
def __init__(self,
|
|
@@ -43,7 +43,7 @@ class PytorchLUTFakeQuant(torch.nn.Module):
|
|
|
43
43
|
|
|
44
44
|
self.quantization_params = quantization_params
|
|
45
45
|
self.activation_is_signed = self.quantization_params.get(SIGNED)
|
|
46
|
-
self.
|
|
46
|
+
self.lut_values = to_torch_tensor(self.quantization_params.get(LUT_VALUES))
|
|
47
47
|
self.threshold = self.quantization_params.get(THRESHOLD)
|
|
48
48
|
|
|
49
49
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
|
@@ -56,7 +56,7 @@ class PytorchLUTFakeQuant(torch.nn.Module):
|
|
|
56
56
|
Returns:
|
|
57
57
|
Quantized torch Tensor.
|
|
58
58
|
"""
|
|
59
|
-
if self.activation_is_signed is None or self.
|
|
59
|
+
if self.activation_is_signed is None or self.lut_values is None or self.threshold is None:
|
|
60
60
|
return None # pragma: no cover
|
|
61
61
|
|
|
62
62
|
_quant_output = self.lut_kmeans_quantizer(x)
|
|
@@ -76,14 +76,14 @@ class PytorchLUTFakeQuant(torch.nn.Module):
|
|
|
76
76
|
Returns: Quantized tensor.
|
|
77
77
|
"""
|
|
78
78
|
|
|
79
|
-
tensor = self.int_quantization_with_threshold(tensor_data,
|
|
79
|
+
tensor = self.int_quantization_with_threshold(tensor_data, LUT_VALUES_BITWIDTH)
|
|
80
80
|
tensor = tensor.unsqueeze(-1)
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
centers = self.
|
|
82
|
+
expanded_lut_values = self.lut_values.reshape([*[1 for _ in range(len(tensor.shape) - 1)], -1])
|
|
83
|
+
lut_values_assignments = torch.argmin(torch.abs(tensor - expanded_lut_values), dim=-1)
|
|
84
|
+
centers = self.lut_values.flatten()[lut_values_assignments]
|
|
85
85
|
|
|
86
|
-
quant_tensor = (centers / (2 ** (
|
|
86
|
+
quant_tensor = (centers / (2 ** (LUT_VALUES_BITWIDTH - int(self.activation_is_signed)))) * self.threshold
|
|
87
87
|
|
|
88
88
|
return quant_tensor
|
|
89
89
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from typing import Dict, Any
|
|
16
16
|
|
|
17
17
|
from model_compression_toolkit.core.common import BaseNode
|
|
18
|
-
from model_compression_toolkit.constants import THRESHOLD, RANGE_MIN, RANGE_MAX, SIGNED,
|
|
18
|
+
from model_compression_toolkit.constants import THRESHOLD, RANGE_MIN, RANGE_MAX, SIGNED, LUT_VALUES, SCALE_PER_CHANNEL
|
|
19
19
|
from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \
|
|
20
20
|
NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig
|
|
21
21
|
|
|
@@ -66,7 +66,7 @@ def get_inferable_quantizer_kwargs(node_qc: BaseNodeQuantizationConfig,
|
|
|
66
66
|
elif quantization_method in [QuantizationMethod.LUT_SYM_QUANTIZER, QuantizationMethod.LUT_POT_QUANTIZER]:
|
|
67
67
|
return {qi_keras_consts.NUM_BITS: node_qc.weights_n_bits,
|
|
68
68
|
qi_keras_consts.PER_CHANNEL: node_qc.weights_per_channel_threshold,
|
|
69
|
-
qi_keras_consts.
|
|
69
|
+
qi_keras_consts.LUT_VALUES: list(node_qc.weights_quantization_params[LUT_VALUES].flatten()),
|
|
70
70
|
qi_keras_consts.THRESHOLD: list(node_qc.weights_quantization_params[SCALE_PER_CHANNEL].flatten()),
|
|
71
71
|
qi_keras_consts.CHANNEL_AXIS: node_qc.weights_channels_axis,
|
|
72
72
|
# TODO: how to pass multiplier nbits and eps for a specific node?
|
|
@@ -98,7 +98,7 @@ def get_inferable_quantizer_kwargs(node_qc: BaseNodeQuantizationConfig,
|
|
|
98
98
|
elif quantization_method in [QuantizationMethod.LUT_POT_QUANTIZER]:
|
|
99
99
|
return {qi_keras_consts.NUM_BITS: node_qc.activation_n_bits,
|
|
100
100
|
qi_keras_consts.SIGNED: node_qc.activation_quantization_params[SIGNED],
|
|
101
|
-
qi_keras_consts.
|
|
101
|
+
qi_keras_consts.LUT_VALUES: node_qc.activation_quantization_params[LUT_VALUES],
|
|
102
102
|
qi_keras_consts.THRESHOLD: [node_qc.activation_quantization_params[THRESHOLD]]
|
|
103
103
|
# TODO: how to pass multiplier nbits and eps for a specific node?
|
|
104
104
|
}
|
|
@@ -17,7 +17,7 @@ from typing import Dict, Any
|
|
|
17
17
|
|
|
18
18
|
from model_compression_toolkit.core.common import BaseNode
|
|
19
19
|
from model_compression_toolkit.constants import THRESHOLD, SIGNED, RANGE_MIN, RANGE_MAX, \
|
|
20
|
-
SCALE_PER_CHANNEL,
|
|
20
|
+
SCALE_PER_CHANNEL, LUT_VALUES
|
|
21
21
|
from model_compression_toolkit.core.common.quantization.node_quantization_config import BaseNodeQuantizationConfig, \
|
|
22
22
|
NodeWeightsQuantizationConfig, NodeActivationQuantizationConfig
|
|
23
23
|
from model_compression_toolkit.logger import Logger
|
|
@@ -64,11 +64,11 @@ def get_weights_inferable_quantizer_kwargs(node_qc: NodeWeightsQuantizationConfi
|
|
|
64
64
|
|
|
65
65
|
elif quantization_method in [QuantizationMethod.LUT_POT_QUANTIZER, QuantizationMethod.LUT_SYM_QUANTIZER]:
|
|
66
66
|
return {qi_inferable_quantizers_constants.NUM_BITS: node_qc.weights_n_bits,
|
|
67
|
-
qi_inferable_quantizers_constants.
|
|
67
|
+
qi_inferable_quantizers_constants.LUT_VALUES: node_qc.weights_quantization_params[LUT_VALUES].flatten(),
|
|
68
68
|
qi_inferable_quantizers_constants.THRESHOLD: node_qc.weights_quantization_params[SCALE_PER_CHANNEL].flatten(),
|
|
69
69
|
qi_inferable_quantizers_constants.PER_CHANNEL: node_qc.weights_per_channel_threshold,
|
|
70
70
|
qi_inferable_quantizers_constants.CHANNEL_AXIS: node_qc.weights_channels_axis}
|
|
71
|
-
# TODO: Add
|
|
71
|
+
# TODO: Add LUT_VALUES_BITWIDTH & EPS to node quantization config
|
|
72
72
|
|
|
73
73
|
else:
|
|
74
74
|
Logger.critical(f'Not supported quantization method for weights inferable quantizers.') # pragma: no cover
|
|
@@ -106,12 +106,12 @@ def get_activation_inferable_quantizer_kwargs(node_qc: NodeActivationQuantizatio
|
|
|
106
106
|
|
|
107
107
|
elif quantization_method in [QuantizationMethod.LUT_POT_QUANTIZER]:
|
|
108
108
|
return {qi_inferable_quantizers_constants.NUM_BITS: node_qc.activation_n_bits,
|
|
109
|
-
qi_inferable_quantizers_constants.
|
|
110
|
-
[node_qc.activation_quantization_params[
|
|
109
|
+
qi_inferable_quantizers_constants.LUT_VALUES: np.asarray(
|
|
110
|
+
[node_qc.activation_quantization_params[LUT_VALUES]]),
|
|
111
111
|
qi_inferable_quantizers_constants.THRESHOLD: np.asarray(
|
|
112
112
|
[node_qc.activation_quantization_params[THRESHOLD]]),
|
|
113
113
|
qi_inferable_quantizers_constants.SIGNED: node_qc.activation_quantization_params.get(SIGNED)}
|
|
114
|
-
# TODO: Add
|
|
114
|
+
# TODO: Add LUT_VALUES_BITWIDTH & EPS to node quantization config
|
|
115
115
|
else:
|
|
116
116
|
Logger.critical(f'Not supported quantization method for inferable quantizers.') # pragma: no cover
|
|
117
117
|
|
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py
CHANGED
|
@@ -63,7 +63,7 @@ class OpQuantizationConfig:
|
|
|
63
63
|
self.quantization_preserving = quantization_preserving
|
|
64
64
|
self.fixed_scale = fixed_scale
|
|
65
65
|
self.fixed_zero_point = fixed_zero_point
|
|
66
|
-
self.
|
|
66
|
+
self.eights_lut_values_bitwidth = weights_multiplier_nbits
|
|
67
67
|
|
|
68
68
|
def get_info(self):
|
|
69
69
|
"""
|
|
File without changes
|
{mct_nightly-1.9.0.20230813.post401.dist-info → mct_nightly-1.9.0.20230814.post352.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|