PyPI - mct-nightly - Versions diffs - 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240418.439__py3-none-any.whl - Mend

mct-nightly 2.0.0.20240417.406py3-none-any.whl → 2.0.0.20240418.439py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240418.439.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240417.406
+Version: 2.0.0.20240418.439
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240418.439.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-model_compression_toolkit/__init__.py,sha256=HSq5ybA5NctJln9ucs7HnIcj00pgOGdhjVxEY-2w5dY,1573
-model_compression_toolkit/constants.py,sha256=f9at1H_-vb5nvdHRmAHUco4ja4_QermK6yu0N9qbRGE,3723
+model_compression_toolkit/__init__.py,sha256=aO8E_DhwQy12oAxKxqXFskaEwaq_icpSqsisZn6UyZM,1573
+model_compression_toolkit/constants.py,sha256=yIJyJ-e1WrDeKD9kG15qkqfYnoj7J1J2CxnJDt008ik,3756
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
 model_compression_toolkit/metadata.py,sha256=IyoON37lBv3TI0rZGCP4K5t3oYI4TOmYy-LRXOwHGpE,1136
 model_compression_toolkit/core/__init__.py,sha256=TrRgkWpT1AN2Faw1M_1HXyJkJnbxfn9p-RigDZl7pg0,1982
 model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
-model_compression_toolkit/core/graph_prep_runner.py,sha256=Ftqm59hT5TGWmSNkY9bFZkVfCacpGyZfCe-6yZR5WY0,10100
-model_compression_toolkit/core/quantization_prep_runner.py,sha256=hFhDkS8GwzXZ7Ho_9qbbb8DAAWs3OONOfMSD5OU_b0o,6153
-model_compression_toolkit/core/runner.py,sha256=NKSC6ujfQPy6dKtJVwxyK2zNDd64eyR5csYy9lBrCPA,11836
+model_compression_toolkit/core/graph_prep_runner.py,sha256=kM70wmNG3yMFiGQc0uO0wn9j4ZbSWxUEykpxDK55doc,10567
+model_compression_toolkit/core/quantization_prep_runner.py,sha256=0ga95vh_ZXO79r8FB26L5GIZKHkG98wq1hMsNH1bIeU,6453
+model_compression_toolkit/core/runner.py,sha256=E_gXj95Az3C3swsv7v1zeKZx25keWjnD30uhI7ONZkY,12028
 model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
 model_compression_toolkit/core/common/base_substitutions.py,sha256=xDFSmVVs_iFSZfajytI0cuQaNRNcwHX3uqOoHgVUvxQ,1666
 model_compression_toolkit/core/common/framework_implementation.py,sha256=pOT9ZmRFL9FY92uUtigrO3sbWGiyVDhHAM1fbA4b5yo,20752
@@ -17,7 +17,7 @@ model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3u
 model_compression_toolkit/core/common/model_collector.py,sha256=ofcepKtxc3j2Ouz6BpAKXTzPgjABnpRP47ndmJCXAkk,8352
 model_compression_toolkit/core/common/model_validation.py,sha256=LaG8wd6aZl0OJgieE3SeiVDEPxtk8IHq9-3wSnmWhY4,1214
 model_compression_toolkit/core/common/node_prior_info.py,sha256=WXX_PrGVG9M9I_REG5ZzFBohwmV4yf356sZnrja_FLo,2832
-model_compression_toolkit/core/common/similarity_analyzer.py,sha256=98l9ttnXHf6VYxBW4852h2CPJKg3A6nLOovpHn-tnKs,8560
+model_compression_toolkit/core/common/similarity_analyzer.py,sha256=5av6qDKNDJDHg0p387oOxemxvp2xkfjzB_QNaSHN6po,9199
 model_compression_toolkit/core/common/user_info.py,sha256=dSRMnT-oewmdOziIpEuW-s9K7vTSeyUBxT4z9neXurI,1648
 model_compression_toolkit/core/common/back2framework/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/core/common/back2framework/base_model_builder.py,sha256=V1oShKzbSkdcTvREn8VnQQBzvm-tTHkWMXqMkYozF2s,2023
@@ -31,7 +31,7 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
 model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
 model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
 model_compression_toolkit/core/common/graph/base_graph.py,sha256=06mvCb_HHA5iIOdQ31a-nimhrpSA-jYnuV1Ir76QGa8,38259
-model_compression_toolkit/core/common/graph/base_node.py,sha256=jPYpf6sci8LswatxTyygD8ZM5OvsCnxBEWsSl-g64wI,28492
+model_compression_toolkit/core/common/graph/base_node.py,sha256=38-4iyOdiuWBD3eZtP7T74NYtLuqLaEj_cQZbAFHpG0,28499
 model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
 model_compression_toolkit/core/common/graph/functional_node.py,sha256=RgwWAoMX7YV5c2gZdTBSX-ziTh3OLbebZXr3jitkxDs,3173
 model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
@@ -45,10 +45,10 @@ model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256
 model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=gRmBEFRmyJsNKezQfiwDwQu1cmbGd2wgKCRTH6iw8mw,3961
 model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=gw4av_rzn_3oEAPpD3B7PHZDqnxHMjIESevl6ppPnkk,7175
 model_compression_toolkit/core/common/hessian/__init__.py,sha256=bxPVbkIlHFJMiOgTdWMVCqcD9JKV5kb2bVdWUTeLpj8,1021
-model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=8B-B5G_0ukNq6ICQNyMUuopSD8viWa72mUPXF3zFlFM,9721
+model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=wUmyekByJIMjupAb4qttVQHsv2pJ1ydDg17U8d5azWE,9660
 model_compression_toolkit/core/common/hessian/hessian_info_utils.py,sha256=FpXQvJmhiF6PAWX9M_0XZ2Qe8Wv8bXcv0Sj3si5YIjQ,1325
 model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py,sha256=bWxavhwDrSHTQPQclUzzW_Q3FVgKEtwrnD7a9lmHNbo,4379
-model_compression_toolkit/core/common/hessian/trace_hessian_request.py,sha256=EvdZFWlpkN9pBqWZ7jReWHIN0FTUy-9x5KgAErXWwSw,3321
+model_compression_toolkit/core/common/hessian/trace_hessian_request.py,sha256=lgZZgkpCURkMNaipFoRqwsONU74OWmMXSZvh4Dc4aMk,3251
 model_compression_toolkit/core/common/matchers/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
 model_compression_toolkit/core/common/matchers/base_graph_filter.py,sha256=mTk54z0mIbFmPOb4h0xfLtLDookcFyNh8H0pIN5js_M,3091
 model_compression_toolkit/core/common/matchers/base_matcher.py,sha256=JCj-NLAXOJa-GcSX-94PVUTWjooQUd0NemiyNg5uKGQ,2210
@@ -102,23 +102,23 @@ model_compression_toolkit/core/common/quantization/core_config.py,sha256=KYdyfSm
 model_compression_toolkit/core/common/quantization/debug_config.py,sha256=HtkMmneN-EmAzgZK4Vp4M8Sqm5QKdrvNyyZMpaVqYzY,1482
 model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=fwF4VILaX-u3ZaFd81xjbJuhg8Ef-JX_KfMXW0TPV-I,7136
 model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=TCgpvtfyzFUedv4sZ6sKzsTyikaVl2ixLj_aHPSC2r0,27014
-model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=BieZDv9oc-Mc78S_LRMGo-s_2acbqiLE0ewaSE1v2VY,6818
+model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=Y76BZ-X2vE_PXeM9r7D93VsFnbC_evoHhN7zYuvFdzw,7041
 model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=T1nVWdRJfBQ_iuMQYQSIkjfkR-2n3lAOKGAz_rUZZN0,2190
 model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=MwIOBZ4BlZSTIOG75PDvlI3JmZ6t8YjPc1VP9Adei60,3847
 model_compression_toolkit/core/common/quantization/quantize_graph_weights.py,sha256=N005MSvx8UypVpa7XrxNrB2G732n2wHj3RmLyjTgd3I,2728
 model_compression_toolkit/core/common/quantization/quantize_node.py,sha256=cdzGNWfT4MRogIU8ehs0tr3lVjnzAI-jeoS9b4TwVBo,2854
-model_compression_toolkit/core/common/quantization/set_node_quantization_config.py,sha256=9BEv2l0z2trDEsr40VB8tO3ToBA_b2sd_jH9uqZ5Wo8,11503
+model_compression_toolkit/core/common/quantization/set_node_quantization_config.py,sha256=O4qFJw3nBYUD4cGbO8haGXZ2-piSqoRpDKDD74iXSxw,12417
 model_compression_toolkit/core/common/quantization/quantization_params_generation/__init__.py,sha256=eCDGwsWYLU6z7qbEVb4TozMW_nd5VEP_iCJ6PcvyEPw,1486
-model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py,sha256=TUJuSpX8pcsIPbJ6z_YGWgD_uafqlKRJcpsTIFpjMKU,19936
-model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=HSbAlDKXZMn8BtQQGL8TnlXvO2f_2oTLXAK1khraX7g,7410
+model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py,sha256=4x6rgQ5bCz2kysVkjBXxbb2dNEC9N1S2TE46kOFXU_c,23305
+model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py,sha256=AROE8pZEHmzGNCRoxr5QH2QFYvu1kefSVk6is3fsifI,8027
 model_compression_toolkit/core/common/quantization/quantization_params_generation/outlier_filter.py,sha256=9gnfJV89jpGwAx8ImJ5E9NjCv3lDtbyulP4OtgWb62M,1772
-model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=BiwDqt5CeU6CW0Qusy3LwWhFtf2J9BvSuGMsTsG6rSw,8538
+model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=ejc_obamUndJsv3F1FuOGMrIibS__qDUbAia1H9vwUM,9487
 model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py,sha256=noEdvGiyyW7acgQ2OFWLedCODibTGYJifC9qo8YIU5U,4558
-model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=H2D9rdChIviL_j0mF6zy8Qeu_ZXKRu-hLqckSAT1MR8,4352
+model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=7ITrOw5ykncpHNghlPNTaDZExFYrPmhRck4oW0GaPe0,6213
 model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py,sha256=7kt0JB8PQE0SW9kg8fCwZ5mBkHNgiRrn0of4ZQYQN2A,41524
-model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py,sha256=nug6XgsywxYf57XF_Tnt2xwdf0zLLsajiZKEblo4lFc,3882
-model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=QtSAtdAb7sTgtoe9L6DnMFO7rjkOtpzE9kD9xmG7eYM,9743
-model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py,sha256=nsaM-AJ6WMUBT31jFIJ2wkYAiGM8qqm9lleMS8AwINI,7933
+model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py,sha256=kAqVKZYu6FHWlC_PUiytsmXdTX1GzO_S5DWrTXuJBjs,4894
+model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=_ULwlPvzVL_UcYVlUPjDIeXz_99eW26l9FwGzaUu-_M,10789
+model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py,sha256=VG0UqFOQk_7ALdJsUl1wwwFLjE38DxN6-NRZx161XiY,8902
 model_compression_toolkit/core/common/quantization/quantizers/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/common/quantization/quantizers/lut_kmeans_quantizer.py,sha256=P0x_y18LypBxP2tV9OWizheYfILqvaMC8RwHo04sUpQ,2761
 model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py,sha256=CCFhi5LUIcHCCIzDyORvm0FDZLknrctdNwNlPphOQgI,14245
@@ -135,6 +135,7 @@ model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py,
 model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py,sha256=YqLKiO5gFBEvI6noAWeMME1JHaYUaGFMglVFg8AqGjc,10028
 model_compression_toolkit/core/common/substitutions/linear_collapsing.py,sha256=iEtzbWCDXP6EDkTZCtREQ0rpMxhQ2kM9zlcP_0KLq9I,12367
 model_compression_toolkit/core/common/substitutions/linear_collapsing_substitution.py,sha256=uoauhmncQqUBNvD-qCLIXsIbl_IzrbxSKdxiMig-5W4,2406
+model_compression_toolkit/core/common/substitutions/remove_identity.py,sha256=LjkedR5fnXy4LCEQ7rnVTBI-cTkdDxXtufge5Llj2J0,2038
 model_compression_toolkit/core/common/substitutions/residual_collapsing.py,sha256=doErjlMq-uSObYMSjA6IywSHb3Hz3QCc0HKU68ccrQ4,4767
 model_compression_toolkit/core/common/substitutions/scale_equalization.py,sha256=p57u25qdW2pimxzGwgMXEBV4S-LzXuTVAlIM7830WfU,10966
 model_compression_toolkit/core/common/substitutions/shift_negative_activation.py,sha256=cyy4qnlD-v1Gou62oHNDsf1hWLWkYfcjVv1otFrUltY,29865
@@ -149,7 +150,7 @@ model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7V
 model_compression_toolkit/core/keras/constants.py,sha256=Uv3c0UdW55pIVQNW_1HQlgl-dHXREkltOLyzp8G1mTQ,3163
 model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
 model_compression_toolkit/core/keras/default_framework_info.py,sha256=Ha4HTHuiw_KTS5Po1Xnv6GyK9eprpDhYWf-eooS62Ys,4961
-model_compression_toolkit/core/keras/keras_implementation.py,sha256=RS2UEtZ_anZeDxz7Zv6sNv7v9tFVct6d9KVrUlxTGpo,29309
+model_compression_toolkit/core/keras/keras_implementation.py,sha256=7RBALls_V0z18WtkWhVEpjAYmaTZvhMxQaDm4J7nkDc,29457
 model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
 model_compression_toolkit/core/keras/keras_node_prior_info.py,sha256=Aqh31wOPaiZcJIOm-uJwzev0eTMdJyXaOk97rs4z7BU,3879
 model_compression_toolkit/core/keras/resource_utilization_data_facade.py,sha256=Xmk2ZL5CaYdb7iG62HdtZ1F64vap7ffnrsuR3e3G5hc,4851
@@ -174,6 +175,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_co
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py,sha256=kjwlKtm5yhNgWVVcW6mN-hn7enwAnn_8-TUZvxZBiQs,4112
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py,sha256=l9PUREBf4aRwWILiybdteveeUbh7js-i-hLt8Ma0e4c,26771
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/relu_bound_to_power_of_2.py,sha256=IdKOg6AWZWMcmDbOuNdxetS5_zTarXIIffdYL7JTdvk,3872
+model_compression_toolkit/core/keras/graph_substitutions/substitutions/remove_identity.py,sha256=z2J2Xk7b_w_fEgJmK87lwwBmEoAZpGxPmsBrR24IkZs,2035
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py,sha256=gSqUYh76tP7NcZfqFSnuPIrUpyBh6UjjcPJtJxZtOZk,3181
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/scale_equalization.py,sha256=ryes9y1ie-vjBGso2TeO4EXxVk69Ew3iSAhshPz1Ou4,5542
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/separableconv_decomposition.py,sha256=TEaHlIbXj_ZjIdT5TmAICD3WLD3u_7g0fLWQcNzTJuM,7941
@@ -211,7 +213,7 @@ model_compression_toolkit/core/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKW
 model_compression_toolkit/core/pytorch/constants.py,sha256=NI-J7REuxn06oEIHsmJ4GqtNC3TbV8xlkJjt5Ar-c4U,2626
 model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=r1XyzUFvrjGcJHQM5ETLsMZIG2yHCr9HMjqf0ti9inw,4175
 model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=IoMvTch5awAEPvB6Tg6ANhFGXvfSgv7JLsUBlxpMwk4,4330
-model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=mT4jd8E1saCpAgrsClufQbnVJ0eYn1xaTQ3teALu4jk,27117
+model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=pDA2hL84XrO0zwAsFxM5a92BO_C2bBEtC9GEo4QaKyM,27267
 model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=n_B4a6FMwM9D2w8kzy3oenBWZgXNZuIZgTJC6JEuTy0,3250
 model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=E6ifk1HdO60k4IRH2EFBzAYWtwUlrGqJoQ66nknpHoQ,4983
 model_compression_toolkit/core/pytorch/utils.py,sha256=dRPiteBg2dBNsHwZyYzXiCIAjnelSoeZZsDXlsTw5JQ,2880
@@ -238,6 +240,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py,sha256=VNg-VgzCxSyqy2J3neEPl6U0SPO8UIVU_T47bGhz4FE,38459
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/permute_call_method.py,sha256=EMCviyFyJFLEKuAUz3rZHLfB9MAU1kywSBL2XQNzLlg,1953
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py,sha256=9tI14dWDQkTCgLwVZdqmHxEek5KgYPL3x5fnJWWq7bg,5667
+model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py,sha256=joHjwiUxccypMHkTy46rI91VyapLn9yJ2YRo5ISnOH4,1987
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py,sha256=jOqlelGhADEZiYUEyYj9oJZ5YLXx8jWNUlVTG6Td79Y,4919
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py,sha256=dwRy3ZZ0qShBEQLknkYUVPtgZsk6rjJ4IXf553mcch8,2902
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py,sha256=XFtU9yuBmoZlX0f0mS6otMPWMk-RcWs94XdvvTNhW8Y,3303
@@ -329,7 +332,7 @@ model_compression_toolkit/exporter/model_wrapper/pytorch/builder/__init__.py,sha
 model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py,sha256=D_mEUK1sb4kY5946oErfw3RC5mfBTVaw3LZRIKWYKcE,4918
 model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py,sha256=4sN5z-6BXrTE5Dp2FX_jKO9ty5iZ2r4RM7XvXtDVLSI,9348
 model_compression_toolkit/gptq/__init__.py,sha256=YKg-tMj9D4Yd0xW9VRD5EN1J5JrmlRbNEF2fOSgodqA,1228
-model_compression_toolkit/gptq/runner.py,sha256=MIg-oBtR1nbHkexySdCJD_XfjRoHSknLotmGBMuD5qM,5924
+model_compression_toolkit/gptq/runner.py,sha256=PQoLK3WhdRuUwZMd1VbtA7KZ9c-zWig_0ShmTtvJSHY,5970
 model_compression_toolkit/gptq/common/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/common/gptq_config.py,sha256=6xP99B-lK1bwGv3AdqxnW1V51z2VdzQcjvoSgJOmygA,5288
 model_compression_toolkit/gptq/common/gptq_constants.py,sha256=QSm6laLkIV0LYmU0BLtmKp3Fi3SqDfbncFQWOGA1cGU,611
@@ -341,7 +344,7 @@ model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCS
 model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
 model_compression_toolkit/gptq/keras/gptq_training.py,sha256=zyVcEQzdnNsrIz32U1pqqoi08hzxRdJ2CumaPFGwbDM,19123
 model_compression_toolkit/gptq/keras/graph_info.py,sha256=5IvgGlJlgOmQYmldjdCBv7tuzAoY0HazatG5Pedrg0Q,4639
-model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=CCV9uyaq-qUGDeXL5OgEWFXSiUkerXrNwFVyA1brrKM,14663
+model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=L5yqjkzw_oszL--dV9EjGoXUYmqM9GmDP7kS7_k96xw,14748
 model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
 model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=2YU-x4-Q5f6hkUJf0tw6vcwdNwRMHdefrFjhhyHYsvA,4782
 model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
@@ -358,7 +361,7 @@ model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=kDuWw-6zh17wZpYWh4Xa9
 model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
 model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=xkDa62AdIRwv8dEshffALW9Ri66eseEpyUF9taMUKns,16509
 model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=yXJzDd24zfGs2_vfMovxD1WSh1RxXoPxN4GztOf3P5c,3967
-model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=iBLEbLgde6JQNPhJysfT2rl_Sc7-wyoIZnXRAXQWnR0,13065
+model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=bZvrMKN2jFJH9fodtbCCAtKNVXIvlOAnIaxcGov320o,13154
 model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
 model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=TCA1hAc7raPnrjl06sjFtVM4XUtLtuwAhCGX4U3KGZo,4137
 model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
@@ -429,7 +432,7 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/__init__.py,sh
 model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py,sha256=aHoAu5Iye9YVn2HLwNb4X9cUDX1WJt20R5GsNGIAk9E,3337
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py,sha256=fPOzybGECCWPkAD1hmJryWZrf9vd5Od-UOH6PE0lH94,3820
-model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py,sha256=v1eush7kGZ_Pdl8iyIVkKIqCmix2afiuPZDMgm6kBrE,1522
+model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py,sha256=F5RG4MnuAwKcNXbfVbPFLQu30-lNax-7knqu20B6udQ,1522
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/__init__.py,sha256=1mMOREEMoNHu_KTMGDp4crN61opKWX6aFn1DrDLvqcc,717
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py,sha256=S-GwMI-JiuPpbtOdd6TSOEjiUFiIs6M2RAiJNJ3O950,10883
 model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tpc_keras.py,sha256=bPBWxopMUHFgiaJjaAfoompwShvfH2wHAouN56PQn0A,6484
@@ -480,8 +483,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
 model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
 model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
 model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=7bbzqJN8ZAycVDvZr_5xC-niTAR5df8f03Kooev_pfg,3047
-mct_nightly-2.0.0.20240417.406.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.0.0.20240417.406.dist-info/METADATA,sha256=wDYGNbzlScIweXxmrfcYA9RSLM_OaB2fYaIsx28fm-Y,18795
-mct_nightly-2.0.0.20240417.406.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-mct_nightly-2.0.0.20240417.406.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.0.0.20240417.406.dist-info/RECORD,,
+mct_nightly-2.0.0.20240418.439.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.0.0.20240418.439.dist-info/METADATA,sha256=ES0claumrC9y2bX7XAFj8RD6nZNBClpiLEVCOphlRxE,18795
+mct_nightly-2.0.0.20240418.439.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+mct_nightly-2.0.0.20240418.439.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.0.0.20240418.439.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.0.0.20240417.000406"
+__version__ = "2.0.0.20240418.000439"

model_compression_toolkit/constants.py CHANGED Viewed

@@ -97,6 +97,8 @@ UPPER_FACTOR = 1.2
 DEC_RANGE_BOTTOM = 0.97
 DEC_RANGE_UPPER = 1.03
+NUM_QPARAM_HESSIAN_SAMPLES = 16
 # Resource utilization computation parameters
 BITS_TO_BYTES = 8.0

model_compression_toolkit/core/common/graph/base_node.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 import copy
-from typing import Dict, Any, Tuple, List, Type
+from typing import Dict, Any, Tuple, List, Type, Union
 import numpy as np

model_compression_toolkit/core/common/hessian/hessian_info_service.py CHANGED Viewed

@@ -17,7 +17,6 @@ from functools import partial
 from typing import Callable, List
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.hessian.trace_hessian_request import TraceHessianRequest
 from model_compression_toolkit.logger import Logger
@@ -38,7 +37,7 @@ class HessianInfoService:
     """
     def __init__(self,
-                 graph: Graph,
+                 graph,
                  representative_dataset: Callable,
                  fw_impl,
                  num_iterations_for_approximation: int = HESSIAN_NUM_ITERATIONS
@@ -151,7 +150,7 @@ class HessianInfoService:
         if required_size==0:
             return []
-        Logger.info(f"Ensuring {required_size} Hessian-trace approximation for node {trace_hessian_request.target_node}.")
+        Logger.info(f"\nEnsuring {required_size} Hessian-trace approximation for node {trace_hessian_request.target_node}.")
         # Replace request of a reused target node with a request of the 'reuse group'.
         if trace_hessian_request.target_node.reuse_group:

model_compression_toolkit/core/common/hessian/trace_hessian_request.py CHANGED Viewed

@@ -16,8 +16,6 @@ from typing import List
 from enum import Enum
-from model_compression_toolkit.core.common import BaseNode
 class HessianMode(Enum):
     """
@@ -54,7 +52,7 @@ class TraceHessianRequest:
     def __init__(self,
                  mode: HessianMode,
                  granularity: HessianInfoGranularity,
-                 target_node: BaseNode,
+                 target_node,
                  ):
         """
         Attributes:

model_compression_toolkit/core/common/quantization/quantization_config.py CHANGED Viewed

@@ -26,14 +26,16 @@ class QuantizationErrorMethod(Enum):
     NOCLIPPING - Use min/max values as thresholds.
-    MSE - Use min square error for minimizing quantization noise.
+    MSE - Use mean square error for minimizing quantization noise.
-    MAE - Use min absolute error for minimizing quantization noise.
+    MAE - Use mean absolute error for minimizing quantization noise.
     KL - Use KL-divergence to make signals distributions to be similar as possible.
     Lp - Use Lp-norm to minimizing quantization noise.
+    HMSE - Use Hessian-based mean squared error for minimizing quantization noise. This method is using Hessian scores to factorize more valuable parameters when computing the error induced by quantization.
     """
     NOCLIPPING = 0
@@ -41,6 +43,7 @@ class QuantizationErrorMethod(Enum):
     MAE = 2
     KL = 4
     LP = 5
+    HMSE = 6
 class QuantizationConfig:

model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py CHANGED Viewed

@@ -13,13 +13,16 @@
 # limitations under the License.
 # ==============================================================================
 from copy import deepcopy
-from typing import Tuple, Callable
+from typing import Tuple, Callable, List
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
+from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianMode, HessianInfoGranularity, \
+    HessianInfoService
 from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_mae, compute_lp_norm
 from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
-from model_compression_toolkit.constants import FLOAT_32
-from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import uniform_quantize_tensor
+from model_compression_toolkit.constants import FLOAT_32, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import uniform_quantize_tensor, \
+    reshape_tensor_for_per_channel_search
 def _mse_error_histogram(q_bins: np.ndarray,
@@ -371,13 +374,63 @@ def _get_sliced_histogram(bins: np.ndarray,
     return bins_subset, counts_subset
+def _compute_hessian_for_hmse(node,
+                              hessian_info_service: HessianInfoService,
+                              num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> List[np.ndarray]:
+    """
+    Compute and retrieve Hessian-based scores for using during HMSE error computation.
+    Args:
+        node: The node to compute Hessian-based scores for.
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on.
+    Returns: A list with computed Hessian-based scores tensors for the given node.
+    """
+    _request = TraceHessianRequest(mode=HessianMode.WEIGHTS,
+                                   granularity=HessianInfoGranularity.PER_ELEMENT,
+                                   target_node=node)
+    _scores_for_node = hessian_info_service.fetch_hessian(_request,
+                                                          required_size=num_hessian_samples)
+    return _scores_for_node
+def _hmse_error_function_wrapper(float_tensor: np.ndarray,
+                                 fxp_tensor: np.ndarray,
+                                 axis: int,
+                                 norm: bool,
+                                 hessian_scores: np.ndarray):
+    """
+    This function wraps the HMSE error method to enable using it during parameters selection.
+    Args:
+        float_tensor: Float tensor.
+        fxp_tensor: Quantized tensor.
+        axis: Axis along which the operation has been performed. If not None, then per-channel computation is expected.
+        norm: Indicates whether to normalize the result of the error function.
+        hessian_scores: A tensor with Hessian-based scores to use for Hessian-based MSE (HMSE) error computation.
+    Returns: The HMSE error between the float and fixed-point tensors.
+    """
+    if axis is not None:
+        hessian_scores = reshape_tensor_for_per_channel_search(hessian_scores, 0)
+    return compute_mse(float_tensor, fxp_tensor, axis, norm, weights=hessian_scores)
 def get_threshold_selection_tensor_error_function(quantization_method: QuantizationMethod,
                                                   quant_error_method: qc.QuantizationErrorMethod,
                                                   p: int,
                                                   axis: int = None,
                                                   norm: bool = False,
                                                   n_bits: int = 8,
-                                                  signed: bool = True) -> Callable:
+                                                  signed: bool = True,
+                                                  node=None,
+                                                  hessian_info_service: HessianInfoService = None,
+                                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Callable:
     """
     Returns the error function compatible to the provided threshold method,
     to be used in the threshold optimization search for tensor quantization.
@@ -389,6 +442,9 @@ def get_threshold_selection_tensor_error_function(quantization_method: Quantizat
         norm: Indicates whether to normalize the result of the error function.
         n_bits: Number of bits used to quantize the tensor.
         signed: Indicates whether the input is signed.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns: a Callable method that calculates the error between a tensor and a quantized tensor.
     """
@@ -418,6 +474,13 @@ def get_threshold_selection_tensor_error_function(quantization_method: Quantizat
                                                                           n_bits=n_bits,
                                                                           per_channel=True)
+    if quant_error_method == qc.QuantizationErrorMethod.HMSE:
+        node_hessian_scores = _compute_hessian_for_hmse(node, hessian_info_service, num_hessian_samples)
+        node_hessian_scores = np.sqrt(np.mean(node_hessian_scores, axis=0))
+        return lambda x, y, threshold: _hmse_error_function_wrapper(x, y, norm=norm, axis=axis,
+                                                                    hessian_scores=node_hessian_scores)
     quant_method_error_function_mapping = {
         qc.QuantizationErrorMethod.MSE: lambda x, y, threshold: compute_mse(x, y, norm=norm, axis=axis),
         qc.QuantizationErrorMethod.MAE: lambda x, y, threshold: compute_mae(x, y, norm=norm, axis=axis),

model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py CHANGED Viewed

@@ -18,7 +18,8 @@ from sklearn.cluster import KMeans
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
 from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL, \
-    LUT_VALUES_BITWIDTH, THRESHOLD
+    LUT_VALUES_BITWIDTH, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import \
     max_power_of_two, int_quantization_with_threshold
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.symmetric_selection import \
@@ -37,7 +38,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
                       n_iter: int = 10,
                       min_threshold: float = MIN_THRESHOLD,
                       quant_error_method: qc.QuantizationErrorMethod = None,
-                      is_symmetric=False) -> dict:
+                      is_symmetric=False,
+                      node=None,
+                      hessian_info_service: HessianInfoService = None,
+                      num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> dict:
     """
     The quantizer first finds the closest max value per channel of tensor_data.
     Now, we divide tensor_data with the threshold vector per channel. In addition, we scale the result to the range
@@ -53,7 +57,10 @@ def lut_kmeans_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search_methods for the optimal threshold.
         min_threshold: Minimal threshold to chose when the computed one is smaller.
         quant_error_method: an error function to optimize the parameters' selection accordingly (not used for this method).
-        is_symmetric (bool): Whether to apply symmetric weight quantization (default is False, meaning power of 2 quantization)
+        is_symmetric (bool): Whether to apply symmetric weight quantization (default is False, meaning power of 2 quantization).
+        node: The node for which the quantization error is computed (not used for this method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (not used for this method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (not used for this method).
     Returns:
         A dictionary containing the cluster assignments according to the k-means algorithm,

model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py CHANGED Viewed

@@ -15,7 +15,8 @@
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
-from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD
+from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
     qparams_selection_tensor_search, qparams_selection_histogram_search
 from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import max_power_of_two, get_tensor_max
@@ -31,7 +32,11 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
                                   channel_axis: int = 1,
                                   n_iter: int = 10,
                                   min_threshold: float = MIN_THRESHOLD,
-                                  quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                                  quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
+                                  node=None,
+                                  hessian_info_service: HessianInfoService = None,
+                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES,
+                                  ) -> dict:
     """
     Compute the power of two threshold based on the provided QuantizationErrorMethod to quantize the tensor.
     Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -45,6 +50,9 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search for the optimal threshold (not used for this method).
         min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
         quant_error_method: an error function to optimize the parameters' selection accordingly.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         Power of two threshold to quantize the tensor in a power of 2 manner.
@@ -57,8 +65,10 @@ def power_of_two_selection_tensor(tensor_data: np.ndarray,
         signed = True  # weights are always signed
         axis = -1 if per_channel else None
         error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.POWER_OF_TWO,
-                                                                       quant_error_method, p, axis=axis, norm=False, n_bits=n_bits,
-                                                                       signed=signed)
+                                                                       quant_error_method, p, axis=axis, norm=False,
+                                                                       n_bits=n_bits, signed=signed, node=node,
+                                                                       hessian_info_service=hessian_info_service,
+                                                                       num_hessian_samples=num_hessian_samples)
         threshold = qparams_selection_tensor_search(error_function,
                                                     tensor_data,
                                                     n_bits,

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py CHANGED Viewed

@@ -12,10 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from tqdm import tqdm
 from typing import List
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core import QuantizationErrorMethod
 from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_activations_computation \
     import get_activations_qparams
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_weights_computation import \
@@ -25,7 +30,9 @@ from model_compression_toolkit.logger import Logger
 def calculate_quantization_params(graph: Graph,
                                   nodes: List[BaseNode] = [],
-                                  specific_nodes: bool = False):
+                                  specific_nodes: bool = False,
+                                  hessian_info_service: HessianInfoService = None,
+                                  num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES):
     """
     For a graph, go over its nodes, compute quantization params (for both weights and activations according
     to the given framework info), and create and attach a NodeQuantizationConfig to each node (containing the
@@ -39,6 +46,8 @@ def calculate_quantization_params(graph: Graph,
         graph: Graph to compute its nodes' thresholds.
         nodes: List of nodes to compute their thresholds instead of computing it for all nodes in the graph.
         specific_nodes: Flag to compute thresholds for only specific nodes.
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     """
@@ -60,10 +69,28 @@ def calculate_quantization_params(graph: Graph,
                         output_channels_axis = channels_axis[0]
                     else:
                         output_channels_axis = None
+                    mod_attr_cfg = attr_cfg
+                    if attr_cfg.weights_error_method == QuantizationErrorMethod.HMSE:
+                        kernel_attr_name = graph.fw_info.get_kernel_op_attributes(n.type)
+                        if len(kernel_attr_name) > 0:
+                            kernel_attr_name = kernel_attr_name[0]
+                        if kernel_attr_name is None or kernel_attr_name not in attr:
+                            Logger.warning(f"The HMSE error method for parameters selection is only supported for "
+                                           f"kernel weights attributes. Running parameters selection for attribute "
+                                           f"'{attr}' in node '{n.name}' with the default MSE error method instead.")
+                            mod_attr_cfg = copy.deepcopy(attr_cfg)
+                            mod_attr_cfg.weights_error_method = QuantizationErrorMethod.MSE
                     weights_params = get_weights_qparams(n.get_weights_by_keys(attr),
                                                          candidate_qc.weights_quantization_cfg,
-                                                         attr_cfg,
-                                                         output_channels_axis)
+                                                         mod_attr_cfg,
+                                                         output_channels_axis,
+                                                         node=n,
+                                                         hessian_info_service=hessian_info_service,
+                                                         num_hessian_samples=num_hessian_samples)
                     attr_cfg.set_weights_quantization_param(weights_params)
             if n.is_activation_quantization_enabled():

model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py CHANGED Viewed

@@ -12,11 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Dict, Any, Tuple
+from typing import Dict, Any
 import numpy as np
-from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.constants import NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
@@ -27,31 +28,40 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
 dummy_channel_mapping = DefaultDict(default_value=(None, None))
-def get_weights_qparams(kernel: np.ndarray,
+def get_weights_qparams(weights_attr_values: np.ndarray,
                         weights_quant_config: NodeWeightsQuantizationConfig,
                         attr_quant_config: WeightsAttrQuantizationConfig,
-                        output_channels_axis: int) -> Dict[Any, Any]:
+                        output_channels_axis: int,
+                        node=None,
+                        hessian_info_service: HessianInfoService = None,
+                        num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> Dict[Any, Any]:
     """
     Compute thresholds to quantize a kernel according to a NodeWeightsQuantizationConfig
     instance.
     Args:
-        kernel: Kernel to compute the quantization thresholds to.
+        weights_attr_values: Weights attribute parameter to compute the quantization thresholds for.
         weights_quant_config: Weights quantization configuration to define how the thresholds are computed.
         attr_quant_config: A specific weights attribute quantization configuration to get its params.
         output_channels_axis: Index of the kernel output channels dimension.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         A dictionary with the quantization threshold of the kernel.
     """
     if attr_quant_config.weights_quantization_params_fn is not None:
-        weights_params = attr_quant_config.weights_quantization_params_fn(kernel,
+        weights_params = attr_quant_config.weights_quantization_params_fn(weights_attr_values,
                                                                           p=attr_quant_config.l_p_value,
                                                                           n_bits=attr_quant_config.weights_n_bits,
                                                                           per_channel=attr_quant_config.weights_per_channel_threshold and output_channels_axis is not None,
                                                                           channel_axis=output_channels_axis,
                                                                           min_threshold=weights_quant_config.min_threshold,
-                                                                          quant_error_method=attr_quant_config.weights_error_method)
+                                                                          quant_error_method=attr_quant_config.weights_error_method,
+                                                                          node=node,
+                                                                          hessian_info_service=hessian_info_service,
+                                                                          num_hessian_samples=num_hessian_samples)
     else:
         weights_params = {}

model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py CHANGED Viewed

@@ -15,7 +15,8 @@
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
-from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD
+from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
     get_threshold_selection_tensor_error_function, get_threshold_selection_histogram_error_function, _kl_error_histogram
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
@@ -33,7 +34,10 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
                                channel_axis: int = 1,
                                n_iter: int = 10,
                                min_threshold: float = MIN_THRESHOLD,
-                               quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                               quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
+                               node=None,
+                               hessian_info_service: HessianInfoService = None,
+                               num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> dict:
     """
     Compute the optimal threshold based on the provided QuantizationErrorMethod to quantize the tensor.
     Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -47,6 +51,9 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search for the optimal threshold (not used for this method).
         min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
         quant_error_method: an error function to optimize the parameters' selection accordingly.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         Optimal threshold to quantize the tensor in a symmetric manner.
@@ -59,7 +66,11 @@ def symmetric_selection_tensor(tensor_data: np.ndarray,
     else:
         signed = True  # weights are always signed
         axis = -1 if per_channel else None
-        error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.SYMMETRIC, quant_error_method, p, axis=axis, norm=False, n_bits=n_bits, signed=signed)
+        error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.SYMMETRIC, quant_error_method,
+                                                                       p, axis=axis, norm=False, n_bits=n_bits,
+                                                                       signed=signed, node=node,
+                                                                       hessian_info_service=hessian_info_service,
+                                                                       num_hessian_samples=num_hessian_samples)
         threshold = qparams_symmetric_selection_tensor_search(error_function,
                                                               tensor_data,
                                                               tensor_max,

model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py CHANGED Viewed

@@ -15,7 +15,8 @@
 import numpy as np
 import model_compression_toolkit.core.common.quantization.quantization_config as qc
-from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX
+from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
     qparams_uniform_selection_tensor_search, qparams_uniform_selection_histogram_search
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
@@ -31,7 +32,10 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
                              channel_axis: int = 1,
                              n_iter: int = 10,
                              min_threshold: float = MIN_THRESHOLD,
-                             quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
+                             quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
+                             node=None,
+                             hessian_info_service: HessianInfoService = None,
+                             num_hessian_samples: int = NUM_QPARAM_HESSIAN_SAMPLES) -> dict:
     """
     Compute the optimal quantization range based on the provided QuantizationErrorMethod
     to uniformly quantize the tensor.
@@ -46,6 +50,9 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
         n_iter: Number of iterations to search for the optimal threshold (not used for this method).
         min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
         quant_error_method: an error function to optimize the range parameters' selection accordingly.
+        node: The node for which the quantization error is computed (used only with HMSE error method).
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores (used only with HMSE error method).
+        num_hessian_samples: Number of samples to approximate Hessian-based scores on (used only with HMSE error method).
     Returns:
         Optimal quantization range to quantize the tensor uniformly.
@@ -57,7 +64,10 @@ def uniform_selection_tensor(tensor_data: np.ndarray,
         mm = tensor_min, tensor_max
     else:
         axis = -1 if per_channel else None
-        error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.UNIFORM, quant_error_method, p, axis=axis, norm=False)
+        error_function = get_threshold_selection_tensor_error_function(QuantizationMethod.UNIFORM, quant_error_method,
+                                                                       p, axis=axis, norm=False, node=node,
+                                                                       hessian_info_service=hessian_info_service,
+                                                                       num_hessian_samples=num_hessian_samples)
         mm = qparams_uniform_selection_tensor_search(error_function,
                                                      tensor_data,
                                                      tensor_min,

model_compression_toolkit/core/common/quantization/set_node_quantization_config.py CHANGED Viewed

@@ -24,7 +24,8 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
 from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
+from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig, \
+    QuantizationErrorMethod
 from model_compression_toolkit.core.common.quantization.quantization_params_fn_selection import \
     get_activation_quantization_params_fn, get_weights_quantization_params_fn
 from model_compression_toolkit.core.common.quantization.quantization_fn_selection import \
@@ -36,19 +37,31 @@ from model_compression_toolkit.target_platform_capabilities.target_platform.op_q
 def set_quantization_configuration_to_graph(graph: Graph,
                                             quant_config: QuantizationConfig,
-                                            mixed_precision_enable: bool = False) -> Graph:
+                                            mixed_precision_enable: bool = False,
+                                            running_gptq: bool = False) -> Graph:
     """
     Add quantization configuration for each graph node.
     Args:
         graph: Graph for which to add quantization info to each node.
         quant_config: Quantization configuration containing parameters for how the graph should be quantized.
-        mixed_precision_enable: is mixed precision enabled
+        mixed_precision_enable: is mixed precision enabled.
+        running_gptq: Whether or not a GPTQ optimization is planned to run after the PTQ process.
     Returns:
         The graph with quantization configurations attached to each node in it.
     """
+    if quant_config.weights_error_method == QuantizationErrorMethod.HMSE:
+        if not running_gptq:
+            Logger.warning(f"The HMSE error method for parameters selection is only supported when running GPTQ "
+                           f"optimization due to long execution time that is not suitable for basic PTQ. "
+                           f"Using the default MSE error method instead.")
+            quant_config.weights_error_method = QuantizationErrorMethod.MSE
+        else:
+            Logger.warning("Using the HMSE error method for weights quantization parameters search. "
+                           "Note: This method may significantly increase runtime during the parameter search process.")
     for n in graph.nodes:
         set_quantization_configs_to_node(node=n,
                                          quant_config=quant_config,

model_compression_toolkit/core/common/similarity_analyzer.py CHANGED Viewed

@@ -18,6 +18,8 @@ from typing import Any
 import numpy as np
 from model_compression_toolkit.constants import EPS
+from model_compression_toolkit.logger import Logger
 #########################
 #  Helpful functions
@@ -87,7 +89,8 @@ def compute_mse(float_tensor: np.ndarray,
                 norm: bool = False,
                 norm_eps: float = 1e-8,
                 batch: bool = False,
-                axis: int = None) -> float:
+                axis: int = None,
+                weights: np.ndarray = None) -> float:
     """
     Compute the mean square error between two numpy arrays.
@@ -98,6 +101,7 @@ def compute_mse(float_tensor: np.ndarray,
         norm_eps: epsilon value for error normalization stability.
         batch: Whether to run batch similarity analysis or not.
         axis: Axis along which the operator has been computed.
+        weights: Weights tensor to use for computing Weighted-MSE error computation.
     Returns:
         The MSE distance between the two tensors.
@@ -107,7 +111,15 @@ def compute_mse(float_tensor: np.ndarray,
     float_flat = flatten_tensor(float_tensor, batch, axis)
     fxp_flat = flatten_tensor(fxp_tensor, batch, axis)
-    error = ((float_flat - fxp_flat) ** 2).mean(axis=-1)
+    if weights is not None:
+        w_flat = flatten_tensor(weights, batch, axis)
+        if w_flat.shape != float_flat.shape:
+            Logger.critical(f"Shape mismatch: The shape of the weights tensor {weights.shape} does not match the shape "
+                            f"of the input tensors {float_flat.shape} for Weighted-MSE computation.")  # pragma: no cover
+        error = ((w_flat * (float_flat - fxp_flat)) ** 2).mean(axis=-1)
+    else:
+        error = ((float_flat - fxp_flat) ** 2).mean(axis=-1)
     if norm:
         error /= ((float_flat ** 2).mean(axis=-1) + norm_eps)

model_compression_toolkit/core/common/substitutions/remove_identity.py ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
+def remove_identity_node(graph: Graph,
+                         node: BaseNode) -> Graph:
+    """
+    The method to perform the substitution of the identity node by
+    reconnecting its input directly to its output, effectively removing the node
+    from the graph.
+    Args:
+        graph: The current graph of operations where the node resides.
+        node: The specific `BaseNode` that is matched to be an Identity operation.
+    Returns:
+        Graph: The updated graph after removing the identity node.
+    """
+    # Retrieve the predecessor nodes of the identity node.
+    prev_identity_nodes = graph.get_prev_nodes(node)
+    # Ensure there is exactly one predecessor; otherwise, do nothing.
+    if len(prev_identity_nodes) != 1:
+        return graph
+    # Reconnect the output edges of the identity node to its predecessor,
+    # effectively bypassing the identity node.
+    graph.reconnect_out_edges(current_node=node, new_node=prev_identity_nodes[0])
+    # Remove the edge from the predecessor to the identity node.
+    graph.remove_edge(prev_identity_nodes[0], node)
+    # Remove the identity node from the graph.
+    graph.remove_node(node_to_remove=node)
+    return graph

model_compression_toolkit/core/graph_prep_runner.py CHANGED Viewed

@@ -39,7 +39,8 @@ def graph_preparation_runner(in_model: Any,
                              fw_impl: FrameworkImplementation,
                              tpc: TargetPlatformCapabilities,
                              tb_w: TensorboardWriter = None,
-                             mixed_precision_enable: bool = False) -> Graph:
+                             mixed_precision_enable: bool = False,
+                             running_gptq: bool = False) -> Graph:
     """
     Runs all required preparations in order to build a quantization graph from the given model,
     quantization configuration and target platform specifications.
@@ -59,6 +60,7 @@ def graph_preparation_runner(in_model: Any,
             the attached framework operator's information.
         tb_w: TensorboardWriter object for logging.
         mixed_precision_enable: is mixed precision enabled.
+        running_gptq: Whether or not a GPTQ optimization is planned to run after the PTQ process.
     Returns:
         An internal graph representation of the input model.
@@ -79,7 +81,8 @@ def graph_preparation_runner(in_model: Any,
                                             fw_info,
                                             tb_w,
                                             fw_impl,
-                                            mixed_precision_enable=mixed_precision_enable)
+                                            mixed_precision_enable=mixed_precision_enable,
+                                            running_gptq=running_gptq)
     return transformed_graph
@@ -90,7 +93,8 @@ def get_finalized_graph(initial_graph: Graph,
                         fw_info: FrameworkInfo = None,
                         tb_w: TensorboardWriter = None,
                         fw_impl: FrameworkImplementation = None,
-                        mixed_precision_enable: bool = False) -> Graph:
+                        mixed_precision_enable: bool = False,
+                        running_gptq: bool = False) -> Graph:
     """
     Applies all edit operation (edit, substitutions, etc.) on the model's graph, to prepare it for the quantization
     process. All future graph substitutions and operations that change the graph should be added to this method.
@@ -105,6 +109,7 @@ def get_finalized_graph(initial_graph: Graph,
         tb_w (TensorboardWriter): TensorboardWriter object to use for logging events such as graphs, histograms, etc.
         fw_impl (FrameworkImplementation): FrameworkImplementation object with a specific framework methods implementation.
         mixed_precision_enable: is mixed precision enabled.
+        running_gptq: Whether or not a GPTQ optimization is planned to run after the PTQ process.
     Returns: Graph object that represents the model, after applying all required modifications to it.
     """
@@ -142,7 +147,8 @@ def get_finalized_graph(initial_graph: Graph,
     ######################################
     transformed_graph = set_quantization_configuration_to_graph(graph=transformed_graph,
                                                                 quant_config=quant_config,
-                                                                mixed_precision_enable=mixed_precision_enable)
+                                                                mixed_precision_enable=mixed_precision_enable,
+                                                                running_gptq=running_gptq)
     ######################################
     # Layer fusing

model_compression_toolkit/core/keras/graph_substitutions/substitutions/remove_identity.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import keras
+import tensorflow as tf
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
+from model_compression_toolkit.core.common.substitutions.remove_identity import remove_identity_node
+class RemoveIdentity(common.BaseSubstitution):
+    """
+    Remove Identity layers from the graph.
+    """
+    def __init__(self):
+        nodes = NodeOperationMatcher(keras.layers.Identity) | NodeOperationMatcher(tf.identity)
+        super().__init__(matcher_instance=nodes)
+    def substitute(self,
+                   graph: Graph,
+                   node: BaseNode) -> Graph:
+        """
+        The method to perform the substitution of the identity keras node by
+        reconnecting its input directly to its output, effectively removing the node
+        from the graph.
+        Args:
+            graph: The current graph of operations where the node resides.
+            node: The specific `BaseNode` that is matched to be an Identity operation.
+        Returns:
+            Graph: The updated graph after removing the identity node.
+        """
+        return remove_identity_node(graph, node)

model_compression_toolkit/core/keras/keras_implementation.py CHANGED Viewed

@@ -22,6 +22,7 @@ from tensorflow.keras.models import Model
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common.hessian import TraceHessianRequest, HessianMode, HessianInfoService
+from model_compression_toolkit.core.keras.graph_substitutions.substitutions.remove_identity import RemoveIdentity
 from model_compression_toolkit.core.keras.hessian.activation_trace_hessian_calculator_keras import \
     ActivationTraceHessianCalculatorKeras
 from model_compression_toolkit.core.keras.hessian.weights_trace_hessian_calculator_keras import WeightsTraceHessianCalculatorKeras
@@ -246,7 +247,8 @@ class KerasImplementation(FrameworkImplementation):
                 MatmulToDenseSubstitution(),
                 MultiHeadAttentionDecomposition(),
                 ActivationDecomposition(),
-                DwconvToConv()]
+                DwconvToConv(),
+                RemoveIdentity()]
     def get_substitutions_pre_statistics_collection(self, quant_config: QuantizationConfig) -> \
             List[common.BaseSubstitution]:

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py ADDED Viewed

@@ -0,0 +1,50 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import torch
+from model_compression_toolkit.core.common.substitutions.remove_identity import remove_identity_node
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
+class RemoveIdentity(common.BaseSubstitution):
+    """
+    Remove `torch.nn.Identity` layers from the graph.
+    """
+    def __init__(self):
+        nodes = NodeOperationMatcher(torch.nn.Identity)
+        super().__init__(matcher_instance=nodes)
+    def substitute(self,
+                   graph: Graph,
+                   node: BaseNode) -> Graph:
+        """
+        The method to perform the substitution of the `torch.nn.Identity` node by
+        reconnecting its input directly to its output, effectively removing the node
+        from the graph.
+        Args:
+            graph: The current graph of operations where the node resides.
+            node: The specific `BaseNode` that is matched to be an Identity operation.
+        Returns:
+            Graph: The updated graph after removing the identity node.
+        """
+        return remove_identity_node(graph, node)

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -58,6 +58,7 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.co
     FunctionalConvSubstitution
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.relu_bound_to_power_of_2 import \
     ReLUBoundToPowerOfTwo
+from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.remove_identity import RemoveIdentity
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.reshape_with_static_shapes import \
     ReshapeWithStaticShapes
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.residual_collapsing import \
@@ -238,7 +239,8 @@ class PytorchImplementation(FrameworkImplementation):
                 PermuteCallMethod(),
                 FunctionalConvSubstitution(fw_info),
                 FunctionalBatchNorm(),
-                FunctionalLayerNorm()]
+                FunctionalLayerNorm(),
+                RemoveIdentity()]
     def get_substitutions_pre_statistics_collection(self,
                                                     quant_config: QuantizationConfig

model_compression_toolkit/core/quantization_prep_runner.py CHANGED Viewed

@@ -21,6 +21,7 @@ from tqdm import tqdm
 from model_compression_toolkit.core.common import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.model_collector import ModelCollector
 from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
@@ -38,7 +39,8 @@ def quantization_preparation_runner(graph: Graph,
                                     core_config: CoreConfig,
                                     fw_info: FrameworkInfo,
                                     fw_impl: FrameworkImplementation,
-                                    tb_w: TensorboardWriter = None) -> Graph:
+                                    tb_w: TensorboardWriter = None,
+                                    hessian_info_service: HessianInfoService = None,) -> Graph:
     """
     Prepares a trained model for post-training quantization.
     First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
@@ -55,6 +57,7 @@ def quantization_preparation_runner(graph: Graph,
             groups of layers by how they should be quantized, etc.).
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         tb_w: TensorboardWriter object for logging
+        hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
     Returns:
         Graph object that represents the model, contains thresholds, and ready for quantization.
@@ -86,7 +89,8 @@ def quantization_preparation_runner(graph: Graph,
     ######################################
     # Calculate quantization params
     ######################################
-    calculate_quantization_params(graph)
+    calculate_quantization_params(graph, hessian_info_service=hessian_info_service)
     if tb_w is not None:
         tb_w.add_graph(graph, 'thresholds_selection')

model_compression_toolkit/core/runner.py CHANGED Viewed

@@ -48,6 +48,7 @@ def core_runner(in_model: Any,
                 fw_impl: FrameworkImplementation,
                 tpc: TargetPlatformCapabilities,
                 target_resource_utilization: ResourceUtilization = None,
+                running_gptq: bool = False,
                 tb_w: TensorboardWriter = None):
     """
     Quantize a trained model using post-training quantization.
@@ -97,7 +98,8 @@ def core_runner(in_model: Any,
                                      fw_impl,
                                      tpc,
                                      tb_w,
-                                     mixed_precision_enable=core_config.mixed_precision_enable)
+                                     mixed_precision_enable=core_config.mixed_precision_enable,
+                                     running_gptq=running_gptq)
     hessian_info_service = HessianInfoService(graph=graph,
                                               representative_dataset=representative_data_gen,
@@ -108,7 +110,8 @@ def core_runner(in_model: Any,
                                          core_config=core_config,
                                          fw_info=fw_info,
                                          fw_impl=fw_impl,
-                                         tb_w=tb_w)
+                                         tb_w=tb_w,
+                                         hessian_info_service=hessian_info_service)
     ######################################
     # Finalize bit widths

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -212,7 +212,8 @@ if FOUND_TF:
                                                                   fw_impl=fw_impl,
                                                                   tpc=target_platform_capabilities,
                                                                   target_resource_utilization=target_resource_utilization,
-                                                                  tb_w=tb_w)
+                                                                  tb_w=tb_w,
+                                                                  running_gptq=True)
         float_graph = copy.deepcopy(tg)

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -180,7 +180,9 @@ if FOUND_TORCH:
                                                                      fw_impl=fw_impl,
                                                                      tpc=target_platform_capabilities,
                                                                      target_resource_utilization=target_resource_utilization,
-                                                                     tb_w=tb_w)
+                                                                     tb_w=tb_w,
+                                                                     running_gptq=True)
         float_graph = copy.deepcopy(graph)
         # ---------------------- #

model_compression_toolkit/gptq/runner.py CHANGED Viewed

@@ -111,6 +111,7 @@ def gptq_runner(tg: Graph,
     #############################################
     # Gradient Based Post Training Quantization
     #############################################
+    Logger.info("Running GPTQ optimization.")
     tg_gptq = _apply_gptq(gptq_config,
                           gptq_representative_data_gen,
                           tb_w,

model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py CHANGED Viewed

@@ -13,12 +13,12 @@
 # limitations under the License.
 # ==============================================================================
 from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH
-from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tp_model import get_tp_model, generate_tp_model, \
+from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model, generate_tp_model, \
     get_op_quantization_configs
 if FOUND_TF:
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import get_keras_tpc as get_keras_tpc_latest
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import generate_keras_tpc
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_latest
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import generate_keras_tpc
 if FOUND_TORCH:
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import get_pytorch_tpc as \
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import get_pytorch_tpc as \
         get_pytorch_tpc_latest
-    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import generate_pytorch_tpc
+    from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import generate_pytorch_tpc

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240418.439.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240418.439.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240418.439.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240418.439__py3-none-any.whl

mct-nightly 2.0.0.20240417.406py3-none-any.whl → 2.0.0.20240418.439py3-none-any.whl