PyPI - mct-nightly - Versions diffs - 2.1.0.20240608.434__py3-none-any.whl → 2.1.0.20240610.442__py3-none-any.whl - Mend

mct-nightly 2.1.0.20240608.434py3-none-any.whl → 2.1.0.20240610.442py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.1.0.20240608.434
+Version: 2.1.0.20240610.442
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.1.0.20240608.434.dist-info → mct_nightly-2.1.0.20240610.442.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=loXKkVRKW11Ehu1o3cKIpDx-z_o1TIydRprxcqjElkA,1573
+model_compression_toolkit/__init__.py,sha256=8uKLxbPGI4bXEsOnz8snYp5aOCbWS0nIiBxD9ic580Y,1573
 model_compression_toolkit/constants.py,sha256=9pVleMwnhlM4QwIL2HcEq42I1uF4rlSw63RUjkxOF4w,3923
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -31,7 +31,7 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
 model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
 model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
 model_compression_toolkit/core/common/graph/base_graph.py,sha256=lmIw0srKiwCvz7KWqfwKTxyQHDy3s6rWMIXzFAa1UMo,38326
-model_compression_toolkit/core/common/graph/base_node.py,sha256=exvUkLDChl6YaoaQRHgSrettsgOsd18bfq01tPxXr-4,29722
+model_compression_toolkit/core/common/graph/base_node.py,sha256=X_0zqHrKYAsmnj9tAKjVYasbFcZD8OHpjdiMj9ugQs0,29436
 model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
 model_compression_toolkit/core/common/graph/functional_node.py,sha256=71_4TrCdqR_r0mtgxmAyqI05iP5YoQQGeSmDgynuzTw,3902
 model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
@@ -101,7 +101,7 @@ model_compression_toolkit/core/common/quantization/candidate_node_quantization_c
 model_compression_toolkit/core/common/quantization/core_config.py,sha256=KYdyfSmjSL4ye24nKlC_c4_AxYb14qoqaeMnZj4-8kE,2257
 model_compression_toolkit/core/common/quantization/debug_config.py,sha256=HtkMmneN-EmAzgZK4Vp4M8Sqm5QKdrvNyyZMpaVqYzY,1482
 model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=fwF4VILaX-u3ZaFd81xjbJuhg8Ef-JX_KfMXW0TPV-I,7136
-model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=0XFJwHbuUjT_C20XB0Omumd6PSQqYj5fnsYHRx78AaU,26733
+model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=u0JkdRqBXG0RvvYyLyvYknEVtB2-gxpqUJnUw3loLmE,26851
 model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=du0VdsxfkOSYaP1EU9gHA5qbXpfQNZL0jXrjk1wBA0U,7106
 model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=eyosbVdnCwed7oMQ19tqnh0VoyGZ_UAuD_UnNoXyBpo,2210
 model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=MwIOBZ4BlZSTIOG75PDvlI3JmZ6t8YjPc1VP9Adei60,3847
@@ -110,15 +110,15 @@ model_compression_toolkit/core/common/quantization/quantize_node.py,sha256=cdzGN
 model_compression_toolkit/core/common/quantization/set_node_quantization_config.py,sha256=O4qFJw3nBYUD4cGbO8haGXZ2-piSqoRpDKDD74iXSxw,12417
 model_compression_toolkit/core/common/quantization/quantization_params_generation/__init__.py,sha256=eCDGwsWYLU6z7qbEVb4TozMW_nd5VEP_iCJ6PcvyEPw,1486
 model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py,sha256=w367wmtJ7iWmM4_HlpX-YVUuqtYKrsiPP1oDaICIuK8,23308
-model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=FWyOcjENAK-bFPpVjgczDiGAWZi--OgJ60jZjPUPqzo,8059
+model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=t0XSwjfOxcq2Sj2PGzccntz1GGv2eqVn9oR3OI0t9wo,8533
 model_compression_toolkit/core/common/quantization/quantization_params_generation/outlier_filter.py,sha256=9gnfJV89jpGwAx8ImJ5E9NjCv3lDtbyulP4OtgWb62M,1772
-model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=ejc_obamUndJsv3F1FuOGMrIibS__qDUbAia1H9vwUM,9487
+model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=HfnhQ4MxGpb95gOWXD1vnroTxxjFt9VFd4jIdo-rvAQ,10623
 model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py,sha256=noEdvGiyyW7acgQ2OFWLedCODibTGYJifC9qo8YIU5U,4558
-model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=7ITrOw5ykncpHNghlPNTaDZExFYrPmhRck4oW0GaPe0,6213
-model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py,sha256=7kt0JB8PQE0SW9kg8fCwZ5mBkHNgiRrn0of4ZQYQN2A,41524
-model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py,sha256=kAqVKZYu6FHWlC_PUiytsmXdTX1GzO_S5DWrTXuJBjs,4894
-model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=_ULwlPvzVL_UcYVlUPjDIeXz_99eW26l9FwGzaUu-_M,10789
-model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py,sha256=VG0UqFOQk_7ALdJsUl1wwwFLjE38DxN6-NRZx161XiY,8902
+model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=E_XFTpYNUZ3JgOk_2qbUbmJH6qGqBM3TDsY4WptYup0,6478
+model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py,sha256=o2XNY_0pUUyId02TUVQBtkux_i40NCcnzuobSeQLy3E,42863
+model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py,sha256=zSNda0jN8cP41m6g5TOv5WvATwIhV8z6AVM1Es6rq1s,4419
+model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=4TP41wPYC0azIzFxUt-lNlKUPIIXQeE4H1SYHkON75k,11875
+model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py,sha256=E83BU4wZEOY-Q-HTo04ABftv22Y6fWEdNYkGA-MZLMU,10494
 model_compression_toolkit/core/common/quantization/quantizers/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/common/quantization/quantizers/lut_kmeans_quantizer.py,sha256=P0x_y18LypBxP2tV9OWizheYfILqvaMC8RwHo04sUpQ,2761
 model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py,sha256=CCFhi5LUIcHCCIzDyORvm0FDZLknrctdNwNlPphOQgI,14245
@@ -199,7 +199,7 @@ model_compression_toolkit/core/keras/quantizer/lut_fake_quant.py,sha256=Up3-sbuA
 model_compression_toolkit/core/keras/reader/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/reader/common.py,sha256=eZWjBcvTDUX7fCWmy1OAH4lYLFTh59_UQ_nP_Gjp4yw,2594
 model_compression_toolkit/core/keras/reader/connectivity_handler.py,sha256=AgF6qXZOJMeXvc-pBnGY23BJz7wPBx2aTYxHiO8efec,11303
-model_compression_toolkit/core/keras/reader/node_builder.py,sha256=URmE3lM9CskS-9a3TuqfReLdHh36Dti08RL8qxzrBjc,10471
+model_compression_toolkit/core/keras/reader/node_builder.py,sha256=SAPkgL8aqJjnB6eCucU2D4m50WACCzWC8wjCVtFnwp8,10424
 model_compression_toolkit/core/keras/reader/reader.py,sha256=wS9UQ2wJKnkZYe9JHwQp7ygDr6CRlzrxmIyLDv1Qz6U,8109
 model_compression_toolkit/core/keras/reader/nested_model/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/reader/nested_model/edges_merger.py,sha256=K6KAH9o8KSG6baLmhKoCrYK-i-wb6gRKiZmoijFqEYA,7906
@@ -222,7 +222,7 @@ model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py,s
 model_compression_toolkit/core/pytorch/back2framework/float_model_builder.py,sha256=tLrlUyYhxVKVjkad1ZAtbRra0HedB3iVfIkZ_dYnQ-4,3419
 model_compression_toolkit/core/pytorch/back2framework/instance_builder.py,sha256=BBHBfTqeWm7L3iDyPBpk0jxvj-rBg1QWI23imkjfIl0,1467
 model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py,sha256=D7lU1r9Uq_7fdNuKk2BMF8ho5GrsY-8gyGN6yYoHaVg,15060
-model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py,sha256=oJdTA9T-qNWY4vEckiYlf3kCQrsl6IVPliXg9S6dqWM,18259
+model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py,sha256=Zw4gi-wjJNV8-qGv79YBWVAHmy27f7iW0c2JGNWAKD0,18199
 model_compression_toolkit/core/pytorch/back2framework/quantized_model_builder.py,sha256=qZNNOlNTTV4ZKPG3q5GDXkIVTPUEr8dvxAS_YiMORmg,3456
 model_compression_toolkit/core/pytorch/back2framework/quantization_wrapper/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/core/pytorch/back2framework/quantization_wrapper/quantized_layer_wrapper.py,sha256=q2JDw10NKng50ee2i9faGzWZ-IydnR2aOMGSn9RoZmc,5773
@@ -431,7 +431,7 @@ model_compression_toolkit/target_platform_capabilities/target_platform/targetpla
 model_compression_toolkit/target_platform_capabilities/tpc_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py,sha256=-jCL-meZWFBF-Dp9wBYTX_14SKmyyUJE-BZ2IQDJIAk,3336
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
-model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py,sha256=bUeKEjL45oU6J1EXwt1MGhlWs_87zF1GGz6X3ES72ps,3796
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py,sha256=mjPFr6Z-PLzqQta8mW7dK31mbbBZsJo4MdpJQmxlSt4,4640
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py,sha256=F5RG4MnuAwKcNXbfVbPFLQu30-lNax-7knqu20B6udQ,1522
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/__init__.py,sha256=1mMOREEMoNHu_KTMGDp4crN61opKWX6aFn1DrDLvqcc,717
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py,sha256=S-GwMI-JiuPpbtOdd6TSOEjiUFiIs6M2RAiJNJ3O950,10883
@@ -453,6 +453,14 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py,sha256=dmi2lCT0dw6RnWVw73tcnqgsVSgINSWaIWfgZhEli4Q,10691
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py,sha256=6PVKQKGpJpM2B1qvmf6fID_-MACaSQZkaL_9J_fj2SQ,6595
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py,sha256=dFQjzFlLDwoUqKNP1at1fS1N1WJadSSasRyzHl6vaB8,5733
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/__init__.py,sha256=gAeebYCKyIXH9-Qwze7FwvTihudzAHk_Qsg94fQbkjQ,717
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py,sha256=edMH4lM7Bq7FaPAFZLU5UMX-bWSWiaaAIXnQE7lZ7rI,11844
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_keras.py,sha256=T5YMv-RzgYlzBaagnMO7WnKgbZ7PrOvm29Nn4vUhCHI,6587
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tpc_pytorch.py,sha256=-q6Tnn7diPCCoATmLDzJwWwviQcbMMISqgpLu2n42JY,5726
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/__init__.py,sha256=C2kwyDE1-rtukkbNSoKRv9q8Nt2GOCaBbl0BdOr3goA,721
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py,sha256=HoGjDwoSx2Y4dQua5v1qzzlnSl_HfDMK6bGWuZhPOzQ,11577
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_keras.py,sha256=LvqUkvpJKXBb9QETcHsmp9OGDwl9KWr457deag8GVuM,6595
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tpc_pytorch.py,sha256=4Y2D14rE0SnWIkBTYsVqCryB-gkHU1ZlbdkWF864mPU,5733
 model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/target_platform_capabilities.py,sha256=7KVcuz0LfngRKOsfcvBysxGVb9fqgoAO6MVTl1CmB5c,2082
 model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py,sha256=UUvUCcTots_sehdRnDfgkaE8WPQ7dPbeuhDF4Qy2nzw,1510
@@ -483,8 +491,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
 model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
 model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
 model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=MxylaVFPgN7zBiRBy6WV610EA4scLgRJFbMucKvvNDU,2896
-mct_nightly-2.1.0.20240608.434.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.1.0.20240608.434.dist-info/METADATA,sha256=I7XXFZFj5zx7OCRB_ggqsafDnsyODn_1o9vsNbTXT00,19721
-mct_nightly-2.1.0.20240608.434.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-mct_nightly-2.1.0.20240608.434.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.1.0.20240608.434.dist-info/RECORD,,
+mct_nightly-2.1.0.20240610.442.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.1.0.20240610.442.dist-info/METADATA,sha256=Juo23o8F4ndhmb8TksZ99xKWtks0DK59daxJqx_9RmI,19721
+mct_nightly-2.1.0.20240610.442.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+mct_nightly-2.1.0.20240610.442.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.1.0.20240610.442.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.1.0.20240608.000434"
+__version__ = "2.1.0.20240610.000442"

model_compression_toolkit/core/common/graph/base_node.py CHANGED Viewed

@@ -240,10 +240,7 @@ class BaseNode:
                                   if isinstance(pos, int)):
             if pos > len(input_tensors):
                 Logger.critical("The positional weight index cannot exceed the number of input tensors to the node.")  # pragma: no cover
-            # Insert only positional weights that are not subject to quantization. If the positional weight is
-            # subject to quantization, the quantization wrapper inserts the positional weight into the node.
-            if not self.is_weights_quantization_enabled(pos):
-                input_tensors.insert(pos, weight)
+            input_tensors.insert(pos, weight)
         return input_tensors

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -326,13 +326,17 @@ class WeightsAttrQuantizationConfig:
         """
         assert self.enable_weights_quantization
+        assert not (self.weights_per_channel_threshold and self.weights_channels_axis is None), \
+            "Trying to calculate threshold per channel, channel axis in None."
         if self.weights_quantization_params_fn is not None:
-            self.set_weights_quantization_param(self.weights_quantization_params_fn(tensor_data,
-                                                                                    p=self.l_p_value,
-                                                                                    n_bits=self.weights_n_bits,
-                                                                                    per_channel=self.weights_per_channel_threshold and self.weights_channels_axis is not None,
-                                                                                    channel_axis=self.weights_channels_axis[0],  # output channel axis
-                                                                                    min_threshold=min_threshold))
+            self.set_weights_quantization_param(
+                self.weights_quantization_params_fn(tensor_data,
+                                                    p=self.l_p_value,
+                                                    n_bits=self.weights_n_bits,
+                                                    per_channel=self.weights_per_channel_threshold and self.weights_channels_axis is not None,
+                                                    channel_axis=self.weights_channels_axis[0],  # output channel axis
+                                                    min_threshold=min_threshold)[0]  # Take only first output, the q-params, as axis is already chosen.
+            )
         else:
             self.set_weights_quantization_param({})

model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from typing import Dict
+from typing import Dict, Tuple
 import numpy as np
 from sklearn.cluster import KMeans
@@ -42,7 +42,8 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
                       is_symmetric: bool = False,
                       node=None,
                       hessian_info_service: HessianInfoService = None,
-                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict:
+                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
+                      ) -> Tuple[Dict[str, np.ndarray], int]:
     """
     The quantizer first finds the closest max value per channel of tensor_data.
     Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
@@ -70,27 +71,34 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
     if n_bits >= LUT_VALUES_BITWIDTH:
         Logger.critical(f'Look-Up-Table (LUT) bit configuration exceeds maximum: {n_bits} bits provided, must be less than {LUT_VALUES_BITWIDTH} bits.')  # pragma: no cover
     # TODO: need to set this externally
+    n_data_points = len(np.unique(tensor_data.flatten()))
     if len(np.unique(tensor_data.flatten())) < 2 ** n_bits:
-        n_clusters = len(np.unique(tensor_data.flatten()))
+        n_clusters = n_data_points
     else:
         n_clusters = 2 ** n_bits
     kmeans = KMeans(n_clusters=n_clusters, n_init=10)
     threshold_selection_tensor = symmetric_selection_tensor if is_symmetric else power_of_two_selection_tensor
-    thresholds_per_channel = threshold_selection_tensor(tensor_data, p, n_bits, per_channel,
-                                                        channel_axis, n_iter, min_threshold,
-                                                        qc.QuantizationErrorMethod.NOCLIPPING)[THRESHOLD]
+    _params, channel_axis = threshold_selection_tensor(tensor_data, p, n_bits, per_channel,
+                                                       channel_axis, n_iter, min_threshold,
+                                                       qc.QuantizationErrorMethod.NOCLIPPING)
+    thresholds_per_channel = _params[THRESHOLD]
     tensor_for_kmeans = int_quantization_with_threshold(tensor_data, thresholds_per_channel, LUT_VALUES_BITWIDTH)
     kmeans.fit(tensor_for_kmeans.reshape(-1, 1))
     # Add 0 to the LUT
     cc = np.round(kmeans.cluster_centers_)
+    if n_data_points < 2 ** n_bits and np.all(cc != 0):
+        # In case there are fewer data points than potential clusters, we can add the cluster 0.0
+        # to the original clusters array to improve quantization (i.e. no need to zero one of the clusters).
+        cc = np.concatenate([np.zeros([1, 1], dtype=cc.dtype), cc])
     closest2zero_idx = (np.abs(cc - 0)).argmin()
     cc[closest2zero_idx] = 0.0
     return {LUT_VALUES: cc,
-            SCALE_PER_CHANNEL: thresholds_per_channel}
+            SCALE_PER_CHANNEL: thresholds_per_channel}, channel_axis
 def lut_kmeans_histogram(bins: np.ndarray,

model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 import numpy as np
+from typing import Union, Tuple, Dict
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
 from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
@@ -23,20 +24,22 @@ from model_compression_toolkit.core.common.quantization.quantizers.quantizers_he
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
     get_threshold_selection_tensor_error_function, get_threshold_selection_histogram_error_function
 from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
+from model_compression_toolkit.core.common.similarity_analyzer import compute_mse
+from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import quantize_tensor
 def power_of_two_selection_tensor(tensor_data: np.ndarray,
                                   p: int,
                                   n_bits: int,
                                   per_channel: bool = False,
-                                  channel_axis: int = 1,
+                                  channel_axis: Union[int, None] = 1,
                                   n_iter: int = 10,
                                   min_threshold: float = MIN_THRESHOLD,
                                   quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
                                   node=None,
                                   hessian_info_service: HessianInfoService = None,
                                   num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
-                                  ) -> dict:
+                                  ) -> Tuple[Dict[str, np.ndarray], int]:
     """
     Compute the power of two threshold based on the provided QuantizationErrorMethod to quantize the tensor.
     Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -46,7 +49,7 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
         p: p-norm to use for the Lp-norm distance.
         n_bits: Number of bits to quantize the tensor.
         per_channel: Whether the quantization should be per-channel or not.
-        channel_axis: Output channel index.
+        channel_axis: Output channel index. if None, search for best axis.
         n_iter: Number of iterations to search for the optimal threshold (not used for this method).
         min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
         quant_error_method: an error function to optimize the parameters' selection accordingly.
@@ -56,11 +59,24 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
     Returns:
         Power of two threshold to quantize the tensor in a power of 2 manner.
+        Selected quantization channel axis.
     """
     if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
-        tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits)
-        threshold = max_power_of_two(tensor_max, min_threshold)
+        if channel_axis is None and per_channel:
+            total_error_list = []
+            th_list = []
+            for _axis in range(len(tensor_data.shape)):
+                tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits)
+                threshold = max_power_of_two(tensor_max, min_threshold)
+                q_tensor_data = quantize_tensor(tensor_data, threshold, n_bits, True)
+                total_error_list.append(compute_mse(tensor_data, q_tensor_data, norm=True))
+                th_list.append(threshold)
+            channel_axis = np.argmin(total_error_list)
+            threshold = th_list[channel_axis]
+        else:
+            tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits)
+            threshold = max_power_of_two(tensor_max, min_threshold)
     else:
         signed = True  # weights are always signed
         axis = -1 if per_channel else None
@@ -69,15 +85,15 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
                                                                        n_bits=n_bits, signed=signed, node=node,
                                                                        hessian_info_service=hessian_info_service,
                                                                        num_hessian_samples=num_hessian_samples)
-        threshold = qparams_selection_tensor_search(error_function,
-                                                    tensor_data,
-                                                    n_bits,
-                                                    per_channel=per_channel,
-                                                    channel_axis=channel_axis,
-                                                    n_iter=n_iter,
-                                                    min_threshold=min_threshold,
-                                                    signed=signed)
-    return {THRESHOLD: threshold}
+        threshold, channel_axis = qparams_selection_tensor_search(error_function,
+                                                                  tensor_data,
+                                                                  n_bits,
+                                                                  per_channel=per_channel,
+                                                                  channel_axis=channel_axis,
+                                                                  n_iter=n_iter,
+                                                                  min_threshold=min_threshold,
+                                                                  signed=signed)
+    return {THRESHOLD: threshold}, channel_axis
 def power_of_two_selection_histogram(bins: np.ndarray,

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -84,13 +84,14 @@ def calculate_quantization_params(graph: Graph,
                             mod_attr_cfg = copy.deepcopy(attr_cfg)
                             mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
-                    weights_params = get_weights_qparams(n.get_weights_by_keys(attr),
-                                                         candidate_qc.weights_quantization_cfg,
-                                                         mod_attr_cfg,
-                                                         output_channels_axis,
-                                                         node=n,
-                                                         hessian_info_service=hessian_info_service,
-                                                         num_hessian_samples=num_hessian_samples)
+                    weights_params, output_channels_axis = get_weights_qparams(n.get_weights_by_keys(attr),
+                                                                               candidate_qc.weights_quantization_cfg,
+                                                                               mod_attr_cfg,
+                                                                               output_channels_axis,
+                                                                               node=n,
+                                                                               hessian_info_service=hessian_info_service,
+                                                                               num_hessian_samples=num_hessian_samples)
+                    attr_cfg.weights_channels_axis = (output_channels_axis, attr_cfg.weights_channels_axis[1])
                     attr_cfg.set_weights_quantization_param(weights_params)
             if n.is_activation_quantization_enabled():

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py CHANGED Viewed

@@ -27,7 +27,7 @@ from model_compression_toolkit.constants import MIN_THRESHOLD, DEFAULT_TOL, DEFA
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import quantize_tensor, \
     reshape_tensor_for_per_channel_search, uniform_quantize_tensor, get_output_shape
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import max_power_of_two, \
-    get_tensor_max
+    get_tensor_max, get_tensor_min
 def qparams_selection_tensor_search(error_function: Callable,
@@ -56,41 +56,49 @@ def qparams_selection_tensor_search(error_function: Callable,
         signed: a flag whether the tensor is signed.
     Returns:
-        Optimal constrained threshold to quantize the tensor.
+        Optimal constrained threshold to quantize the tensor, and best channel axis if input channel_axis was None,
+        else return the input channel axis.
     """
-    output_shape = get_output_shape(tensor_data.shape, channel_axis)
-    # First threshold to check is the constrained threshold based on the tensor's maximal value.
-    tensor_max = get_tensor_max(tensor_data, per_channel, channel_axis, n_bits)
-    threshold = 2 * max_power_of_two(tensor_max, min_threshold)
+    search_axes = range(len(tensor_data.shape)) if channel_axis is None and per_channel else [channel_axis]
+    total_error_list = []
+    th_list = []
+    for _axis in search_axes:
+        output_shape = get_output_shape(tensor_data.shape, _axis)
-    # If the threshold is computed per-channel, we rearrange the tensor such that each sub-tensor
-    # is flattened, and we iterate over each one of them when searching for the threshold.
-    if per_channel:
-        tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, channel_axis)
+        # First threshold to check is the constrained threshold based on the tensor's maximal value.
+        tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits)
+        threshold = 2 * max_power_of_two(tensor_max, min_threshold)
-    error_list = []  # init an empty error list
-    # On each iteration a new constrained threshold which equal to half of the previous tested threshold
-    # is used for quantizing the tensor and computing the error. The error is appended to an error list, which
-    # eventually used to select the threshold with the minimal error.
-    for i in range(n_iter):
+        # Rearrange the tensor such that each sub-tensor is flattened, and we iterate over each
+        # one of them when searching for the threshold.
         if per_channel:
-            threshold_hat = (threshold / (2 ** i)).reshape([-1, 1])
-            qt = quantize_tensor(tensor_data_r, threshold_hat, n_bits, signed)
-            per_channel_error = _error_function_wrapper(error_function, tensor_data_r, qt, threshold_hat)
-            error_list.append(per_channel_error)
-        else:  # quantize per-tensor
-            qt = quantize_tensor(tensor_data, threshold / (2 ** i), n_bits, signed)
-            error = error_function(qt, tensor_data, threshold=threshold / (2 ** i))
-            error_list.append(error)
-    # Take the index of the minimal error, and use it compute the threshold which yielded it.
-    i = np.argmin(np.stack(error_list, axis=-1), axis=-1)
-    return np.maximum(np.reshape(threshold.flatten() / np.power(2, i), output_shape), min_threshold)
+            tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, _axis)
+        error_list = []  # init an empty error list
+        # On each iteration a new constrained threshold which equal to half of the previous tested threshold
+        # is used for quantizing the tensor and computing the error. The error is appended to an error list, which
+        # eventually used to select the threshold with the minimal error.
+        for i in range(n_iter):
+            if per_channel:
+                threshold_hat = (threshold / (2 ** i)).reshape([-1, 1])
+                qt = quantize_tensor(tensor_data_r, threshold_hat, n_bits, signed)
+                per_channel_error = _error_function_wrapper(error_function, tensor_data_r, qt, threshold_hat)
+                error_list.append(per_channel_error)
+            else:  # quantize per-tensor
+                qt = quantize_tensor(tensor_data, threshold / (2 ** i), n_bits, signed)
+                error = error_function(qt, tensor_data, threshold=threshold / (2 ** i))
+                error_list.append(error)
+        # Take the index of the minimal error, and use it compute the threshold which yielded it.
+        err_mat = np.stack(error_list, axis=-1)
+        i = np.argmin(err_mat, axis=-1)
+        th_list.append(np.maximum(np.reshape(threshold.flatten() / np.power(2, i), output_shape), min_threshold))
+        total_error_list.append(err_mat.min(axis=-1).mean())
+    best_axis_index = np.argmin(total_error_list)
+    return th_list[best_axis_index], search_axes[best_axis_index]
 def qparams_selection_histogram_search(error_function: Callable,
@@ -390,13 +398,12 @@ def search_dynamic_range(base_range: np.ndarray, x: np.ndarray, scalers: np.ndar
 def qparams_symmetric_selection_tensor_search(error_function: Callable,
                                               tensor_data: np.ndarray,
-                                              tensor_max: np.ndarray,
                                               n_bits: int,
                                               per_channel: bool = False,
                                               channel_axis: int = 1,
                                               n_iter: int = SYMMETRIC_TENSOR_PER_CHANNEL_N_ITER,
                                               min_threshold=MIN_THRESHOLD,
-                                              signed: bool = True) -> Any:
+                                              signed: bool = True) -> Tuple[np.ndarray, int]:
     """
     Search for optimal threshold (per-channel or per-tensor) for symmetric quantization of a tensor,
     using the iterative optimizer method.
@@ -404,7 +411,6 @@ def qparams_symmetric_selection_tensor_search(error_function: Callable,
     Args:
         error_function: Function to compute the error between the original and quantized tensors.
         tensor_data: Numpy array with tensor's content.
-        tensor_max: The max value of the tensor.
         n_bits: Number of bits to quantize the tensor.
         per_channel: Whether the tensor should be quantized per-channel or per-tensor.
         channel_axis: Index of output channels dimension.
@@ -417,46 +423,55 @@ def qparams_symmetric_selection_tensor_search(error_function: Callable,
     """
-    output_shape = get_output_shape(tensor_data.shape, channel_axis)
+    search_axes = range(len(tensor_data.shape)) if channel_axis is None and per_channel else [channel_axis]
+    total_error_list = []
+    th_list = []
+    for _axis in search_axes:
+        tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits)
+        output_shape = get_output_shape(tensor_data.shape, _axis)
-    # If the threshold is computed per-channel, we rearrange the tensor such that each sub-tensor
-    # is flattened, and we iterate over each one of them when searching for the threshold.
-    if per_channel:
-        tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, channel_axis)
-        max_tensor = np.maximum(min_threshold, tensor_max)
-        res = qparams_symmetric_iterative_minimization(x0=max_tensor,
-                                                       x=tensor_data_r,
-                                                       loss_fn=error_function,  # gets float_tensor, fxp_tensor, threshold
-                                                       n_bits=n_bits,
-                                                       signed=signed,
-                                                       n_intervals=SYMMETRIC_TENSOR_PER_CHANNEL_N_INTERVALS,
-                                                       n_iter=SYMMETRIC_TENSOR_PER_CHANNEL_N_ITER,
-                                                       dec_freq=SYMMETRIC_TENSOR_PER_CHANNEL_DEC_FREQ,
-                                                       per_channel=True)
-        return np.reshape(np.maximum(min_threshold, res['param']), output_shape)
-    else:
-        # quantize per-tensor
-        res = qparams_symmetric_iterative_minimization(x0=get_init_threshold(min_threshold, tensor_max),
-                                                       x=tensor_data,
-                                                       loss_fn=error_function,
-                                                       n_bits=n_bits,
-                                                       signed=signed,
-                                                       n_intervals=SYMMETRIC_TENSOR_N_INTERVALS,
-                                                       n_iter=SYMMETRIC_TENSOR_N_ITER,
-                                                       dec_freq=SYMMETRIC_TENSOR_DEC_FREQ,
-                                                       per_channel=False)
-        return max(min_threshold, res['param'])
+        if per_channel:
+            # Rearrange the tensor such that each sub-tensor is flattened, and we iterate
+            # over each one of them when searching for the threshold.
+            tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, _axis)
+            max_tensor = np.maximum(min_threshold, tensor_max)
+            res = qparams_symmetric_iterative_minimization(x0=max_tensor,
+                                                           x=tensor_data_r,
+                                                           loss_fn=error_function,  # gets float_tensor, fxp_tensor, threshold
+                                                           n_bits=n_bits,
+                                                           signed=signed,
+                                                           n_intervals=SYMMETRIC_TENSOR_PER_CHANNEL_N_INTERVALS,
+                                                           n_iter=SYMMETRIC_TENSOR_PER_CHANNEL_N_ITER,
+                                                           dec_freq=SYMMETRIC_TENSOR_PER_CHANNEL_DEC_FREQ,
+                                                           per_channel=True)
+            th = np.reshape(np.maximum(min_threshold, res['param']), output_shape)
+        else:
+            # quantize per-tensor
+            res = qparams_symmetric_iterative_minimization(x0=get_init_threshold(min_threshold, tensor_max),
+                                                           x=tensor_data,
+                                                           loss_fn=error_function,
+                                                           n_bits=n_bits,
+                                                           signed=signed,
+                                                           n_intervals=SYMMETRIC_TENSOR_N_INTERVALS,
+                                                           n_iter=SYMMETRIC_TENSOR_N_ITER,
+                                                           dec_freq=SYMMETRIC_TENSOR_DEC_FREQ,
+                                                           per_channel=False)
+            th = max(min_threshold, res['param'])
+        total_error_list.append(res['loss'].mean())
+        th_list.append(th)
+    best_axis_index = np.argmin(total_error_list)
+    return th_list[best_axis_index], search_axes[best_axis_index]
 def qparams_uniform_selection_tensor_search(error_function: Callable,
                                             tensor_data: np.ndarray,
-                                            tensor_min: np.ndarray,
-                                            tensor_max: np.ndarray,
                                             n_bits: int,
                                             per_channel: bool = False,
                                             channel_axis: int = 1,
-                                            n_iter: int = UNIFORM_TENSOR_PER_CHANNEL_N_ITER) -> Any:
+                                            n_iter: int = UNIFORM_TENSOR_PER_CHANNEL_N_ITER,
+                                            ) -> Tuple[Tuple[np.ndarray, np.ndarray], int]:
     """
     Search for optimal quantization range (per-channel or per-tensor) for uniform quantization of a tensor,
     using the iterative optimizer method and built-in scale factors
@@ -465,8 +480,6 @@ def qparams_uniform_selection_tensor_search(error_function: Callable,
     Args:
         error_function: Function to compute the error between the original and quantized tensors.
         tensor_data: Numpy array with tensor's content.
-        tensor_min: The min value of the tensor.
-        tensor_max: The max value of the tensor.
         n_bits: Number of bits to quantize the tensor.
         per_channel: Whether the tensor should be quantized per-channel or per-tensor.
         channel_axis: Index of output channels dimension.
@@ -477,17 +490,22 @@ def qparams_uniform_selection_tensor_search(error_function: Callable,
     """
-    output_shape = get_output_shape(tensor_data.shape, channel_axis)
+    search_axes = range(len(tensor_data.shape)) if channel_axis is None and per_channel else [channel_axis]
+    total_error_list = []
+    th_list = []
+    for _axis in search_axes:
+        tensor_min = get_tensor_min(tensor_data, per_channel, _axis)
+        tensor_max = get_tensor_max(tensor_data, per_channel, _axis, n_bits, is_uniform_quantization=True)
+        output_shape = get_output_shape(tensor_data.shape, _axis)
-    alpha = np.linspace(BOTTOM_FACTOR, UPPER_FACTOR, UNIFORM_TENSOR_N_SAMPLES)
-    beta = np.linspace(BOTTOM_FACTOR, UPPER_FACTOR, UNIFORM_TENSOR_N_SAMPLES)
-    scalers = np.asarray(list(itertools.product(alpha, beta)))
+        alpha = np.linspace(BOTTOM_FACTOR, UPPER_FACTOR, UNIFORM_TENSOR_N_SAMPLES)
+        beta = np.linspace(BOTTOM_FACTOR, UPPER_FACTOR, UNIFORM_TENSOR_N_SAMPLES)
+        scalers = np.asarray(list(itertools.product(alpha, beta)))
-    # If the threshold is computed per-channel, we rearrange the tensor such that each sub-tensor
-    # is flattened, and we iterate over each one of them when searching for the threshold.
-    if per_channel:
+        # Rearrange the tensor such that each sub-tensor is flattened, and we iterate over
+        # each one of them when searching for the threshold.
         if per_channel:
-            tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, channel_axis)
+            tensor_data_r = reshape_tensor_for_per_channel_search(tensor_data, _axis)
             tensor_min_max = np.column_stack([tensor_min.flatten(), tensor_max.flatten()])
             res = iterative_uniform_dynamic_range_search(x0=tensor_min_max,
                                                          x=tensor_data_r,
@@ -496,18 +514,21 @@ def qparams_uniform_selection_tensor_search(error_function: Callable,
                                                          n_bits=n_bits,
                                                          n_iter=UNIFORM_TENSOR_PER_CHANNEL_N_ITER,
                                                          per_channel=True)
-            return np.reshape(res['param'][:, 0], output_shape), np.reshape(res['param'][:, 1], output_shape)
-    else:
-        # quantize per-tensor
-        pass
-        res = iterative_uniform_dynamic_range_search(x0=np.array([tensor_min, tensor_max]),
-                                                     x=tensor_data,
-                                                     scalers=scalers,
-                                                     loss_fn=error_function,
-                                                     n_bits=n_bits,
-                                                     n_iter=UNIFORM_TENSOR_N_ITER,
-                                                     per_channel=False)
-        return res['param']
+            th_list.append((np.reshape(res['param'][:, 0], output_shape), np.reshape(res['param'][:, 1], output_shape)))
+        else:
+            # quantize per-tensor
+            res = iterative_uniform_dynamic_range_search(x0=np.array([tensor_min, tensor_max]),
+                                                         x=tensor_data,
+                                                         scalers=scalers,
+                                                         loss_fn=error_function,
+                                                         n_bits=n_bits,
+                                                         n_iter=UNIFORM_TENSOR_N_ITER,
+                                                         per_channel=False)
+            th_list.append(tuple(np.split(res['param'], 2)))
+        total_error_list.append(res['loss'].mean())
+    best_axis_index = np.argmin(total_error_list)
+    return th_list[best_axis_index], search_axes[best_axis_index]
 def qparams_symmetric_selection_histogram_search(error_function: Callable,

mct-nightly 2.1.0.20240608.434__py3-none-any.whl → 2.1.0.20240610.442__py3-none-any.whl

mct-nightly 2.1.0.20240608.434py3-none-any.whl → 2.1.0.20240610.442py3-none-any.whl