PyPI - mct-nightly - Versions diffs - 2.0.0.20240408.430__py3-none-any.whl → 2.0.0.20240410.422__py3-none-any.whl - Mend

mct-nightly 2.0.0.20240408.430py3-none-any.whl → 2.0.0.20240410.422py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{mct_nightly-2.0.0.20240408.430.dist-info → mct_nightly-2.0.0.20240410.422.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240408.430
+Version: 2.0.0.20240410.422
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.0.0.20240408.430.dist-info → mct_nightly-2.0.0.20240410.422.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-model_compression_toolkit/__init__.py,sha256=anO19rotJl4O84TFH2Q-g_vw5me9I3_WDoj1NZgiLbo,1573
+model_compression_toolkit/__init__.py,sha256=c33LV9Kt6hpVEoLixt_I5rqhtSzRBPSrdmFEifg-VHU,1573
 model_compression_toolkit/constants.py,sha256=KW_HUEPmQEYqCvWGyORqkYxpvO7w5LViB5J5D-pm_6o,3648
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
 model_compression_toolkit/core/__init__.py,sha256=TrRgkWpT1AN2Faw1M_1HXyJkJnbxfn9p-RigDZl7pg0,1982
-model_compression_toolkit/core/analyzer.py,sha256=dbsD61pakp_9JXNyAScLdtJvcXny9jr_cMbET0Bd3Sg,2975
+model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
 model_compression_toolkit/core/graph_prep_runner.py,sha256=Ftqm59hT5TGWmSNkY9bFZkVfCacpGyZfCe-6yZR5WY0,10100
 model_compression_toolkit/core/quantization_prep_runner.py,sha256=hFhDkS8GwzXZ7Ho_9qbbb8DAAWs3OONOfMSD5OU_b0o,6153
 model_compression_toolkit/core/runner.py,sha256=NKSC6ujfQPy6dKtJVwxyK2zNDd64eyR5csYy9lBrCPA,11836
@@ -16,7 +16,7 @@ model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3u
 model_compression_toolkit/core/common/model_collector.py,sha256=ofcepKtxc3j2Ouz6BpAKXTzPgjABnpRP47ndmJCXAkk,8352
 model_compression_toolkit/core/common/model_validation.py,sha256=LaG8wd6aZl0OJgieE3SeiVDEPxtk8IHq9-3wSnmWhY4,1214
 model_compression_toolkit/core/common/node_prior_info.py,sha256=WXX_PrGVG9M9I_REG5ZzFBohwmV4yf356sZnrja_FLo,2832
-model_compression_toolkit/core/common/similarity_analyzer.py,sha256=2w-q7guEb5bpLY4Vk_TMjR8TzLYEymR3tPFlrVq7K68,8515
+model_compression_toolkit/core/common/similarity_analyzer.py,sha256=98l9ttnXHf6VYxBW4852h2CPJKg3A6nLOovpHn-tnKs,8560
 model_compression_toolkit/core/common/user_info.py,sha256=dSRMnT-oewmdOziIpEuW-s9K7vTSeyUBxT4z9neXurI,1648
 model_compression_toolkit/core/common/back2framework/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/core/common/back2framework/base_model_builder.py,sha256=V1oShKzbSkdcTvREn8VnQQBzvm-tTHkWMXqMkYozF2s,2023
@@ -100,11 +100,11 @@ model_compression_toolkit/core/common/quantization/candidate_node_quantization_c
 model_compression_toolkit/core/common/quantization/core_config.py,sha256=KYdyfSmjSL4ye24nKlC_c4_AxYb14qoqaeMnZj4-8kE,2257
 model_compression_toolkit/core/common/quantization/debug_config.py,sha256=HtkMmneN-EmAzgZK4Vp4M8Sqm5QKdrvNyyZMpaVqYzY,1482
 model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=fwF4VILaX-u3ZaFd81xjbJuhg8Ef-JX_KfMXW0TPV-I,7136
-model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=lXONxIOSvYMgkN9M1st4tV1V5JSpijUGxF0hZWRvtUI,26737
-model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=hQMKm55EXS1oV-Upt6IQtsYhpuhMvYeWRJhh6lhv_Ko,6699
+model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=TCgpvtfyzFUedv4sZ6sKzsTyikaVl2ixLj_aHPSC2r0,27014
+model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=BieZDv9oc-Mc78S_LRMGo-s_2acbqiLE0ewaSE1v2VY,6818
 model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=T1nVWdRJfBQ_iuMQYQSIkjfkR-2n3lAOKGAz_rUZZN0,2190
 model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=MwIOBZ4BlZSTIOG75PDvlI3JmZ6t8YjPc1VP9Adei60,3847
-model_compression_toolkit/core/common/quantization/quantize_graph_weights.py,sha256=xnM9O9LshYw3dprqfsnK9mw7ipOEAkI85o20auyfswg,2626
+model_compression_toolkit/core/common/quantization/quantize_graph_weights.py,sha256=N005MSvx8UypVpa7XrxNrB2G732n2wHj3RmLyjTgd3I,2728
 model_compression_toolkit/core/common/quantization/quantize_node.py,sha256=cdzGNWfT4MRogIU8ehs0tr3lVjnzAI-jeoS9b4TwVBo,2854
 model_compression_toolkit/core/common/quantization/set_node_quantization_config.py,sha256=9BEv2l0z2trDEsr40VB8tO3ToBA_b2sd_jH9uqZ5Wo8,11503
 model_compression_toolkit/core/common/quantization/quantization_params_generation/__init__.py,sha256=eCDGwsWYLU6z7qbEVb4TozMW_nd5VEP_iCJ6PcvyEPw,1486
@@ -142,13 +142,13 @@ model_compression_toolkit/core/common/substitutions/virtual_activation_weights_c
 model_compression_toolkit/core/common/substitutions/weights_activation_split.py,sha256=h85L2VlDOqbLd-N98wA3SdYWiblBgSsPceNuLanJd70,4737
 model_compression_toolkit/core/common/visualization/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/common/visualization/final_config_visualizer.py,sha256=6I10jKLesB-RQKaXA75Xgz2wPvylQUrnPtCcQZIynGo,6371
-model_compression_toolkit/core/common/visualization/nn_visualizer.py,sha256=jjsOng-fLxeQFQNshIsOu_w1d5a3fJ359Hcnt85Te-o,5921
+model_compression_toolkit/core/common/visualization/nn_visualizer.py,sha256=HOq7AObkmEZiDSZXUMJDAEJzUY-fSXUT0AMgwiyH7dg,7388
 model_compression_toolkit/core/common/visualization/tensorboard_writer.py,sha256=4E4ZXZmqusGIJ4XQNH8FFt07htAHgT3gy5E7wPIaVBI,21951
 model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/constants.py,sha256=Uv3c0UdW55pIVQNW_1HQlgl-dHXREkltOLyzp8G1mTQ,3163
 model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
 model_compression_toolkit/core/keras/default_framework_info.py,sha256=Ha4HTHuiw_KTS5Po1Xnv6GyK9eprpDhYWf-eooS62Ys,4961
-model_compression_toolkit/core/keras/keras_implementation.py,sha256=NDHLl19I-xQrQGcsAwTcFjnIjCRn31xaPrqDYm8g_dg,29027
+model_compression_toolkit/core/keras/keras_implementation.py,sha256=RS2UEtZ_anZeDxz7Zv6sNv7v9tFVct6d9KVrUlxTGpo,29309
 model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
 model_compression_toolkit/core/keras/keras_node_prior_info.py,sha256=Aqh31wOPaiZcJIOm-uJwzev0eTMdJyXaOk97rs4z7BU,3879
 model_compression_toolkit/core/keras/resource_utilization_data_facade.py,sha256=Xmk2ZL5CaYdb7iG62HdtZ1F64vap7ffnrsuR3e3G5hc,4851
@@ -166,6 +166,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/activatio
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_folding.py,sha256=9YCNPiK5BD7tLs1meabPhzfb2VsyPxrZM17zMFsW_Fo,8158
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_reconstruction.py,sha256=GR1a3mCZpNUu4WxixJXF_aSm57phAdxaRoHecNx3hxw,3168
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_refusing.py,sha256=5df_xGfXkqNub4xVRnCWQvSohWqdv12axjJ6edVU2H0,2478
+model_compression_toolkit/core/keras/graph_substitutions/substitutions/concat_threshold_update.py,sha256=Hl4LEQ_bw_Vpmf3ZqHujYUqVdvTNsPlEMvr9dZhwg2U,2806
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py,sha256=R3U7cjc2E0zheMem16GHygp5jZFGSaomkNOTxTjcAgw,5794
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py,sha256=V6hp67CkS_A3WqdsjLjs0ETtdZAOo4P9mhy4aT7W5FE,5940
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py,sha256=dyhqZrxSTclXyarT2JYnI5WPX0OvWR_CQiwddIr632U,8143
@@ -209,7 +210,7 @@ model_compression_toolkit/core/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKW
 model_compression_toolkit/core/pytorch/constants.py,sha256=NI-J7REuxn06oEIHsmJ4GqtNC3TbV8xlkJjt5Ar-c4U,2626
 model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=r1XyzUFvrjGcJHQM5ETLsMZIG2yHCr9HMjqf0ti9inw,4175
 model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=IoMvTch5awAEPvB6Tg6ANhFGXvfSgv7JLsUBlxpMwk4,4330
-model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=dakO4Nj-tFfs53y6dJyXbpoljx2n3ZqmMoB4CFWGNSQ,26868
+model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=mT4jd8E1saCpAgrsClufQbnVJ0eYn1xaTQ3teALu4jk,27117
 model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=n_B4a6FMwM9D2w8kzy3oenBWZgXNZuIZgTJC6JEuTy0,3250
 model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=E6ifk1HdO60k4IRH2EFBzAYWtwUlrGqJoQ66nknpHoQ,4983
 model_compression_toolkit/core/pytorch/utils.py,sha256=dRPiteBg2dBNsHwZyYzXiCIAjnelSoeZZsDXlsTw5JQ,2880
@@ -228,6 +229,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/__init_
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/batchnorm_folding.py,sha256=j3q5DzbH3ys5MPFfSOVnAXdD7-g4XEKj2ADrdihVr30,8292
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/batchnorm_reconstruction.py,sha256=B7aC2TZNrQJ2oQVGBFhKAVqdUU5lYVJSMmwKhjxOHWk,2822
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/batchnorm_refusing.py,sha256=JDWOaNwYrZG0zTwd3HwoZUM3tKu7zPbzLOrqNQsu8xA,2162
+model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/concat_threshold_update.py,sha256=SBrR24ZAnWPftLinv4FuIqdBGjfYtfXbYQJN5mgy5V4,2861
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py,sha256=dYGyb5ebnoeFBF0EaHPQU7CkXvoARdznEEe0laM47LA,3919
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py,sha256=iX8bLHtw2osP42-peNLTRmbpX3cUxdGsAbEfw7NLpx0,3935
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_layer_norm.py,sha256=zKSgtVw_P9fUvdq4e7P9yaLDPG_vZ0cecM9sVPtm1ns,3799
@@ -336,9 +338,9 @@ model_compression_toolkit/gptq/common/gptq_training.py,sha256=rLA1xlOO-6gWfmc2dL
 model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
 model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
-model_compression_toolkit/gptq/keras/gptq_training.py,sha256=6TseqtBzZkLqyc3hiRVdA1dv01us6Y_Su05CBboGjjc,18438
+model_compression_toolkit/gptq/keras/gptq_training.py,sha256=OhYfH6zxRHrRhCde0lbcV9Hu2oeDD9RXh-O8vOPgLbs,18875
 model_compression_toolkit/gptq/keras/graph_info.py,sha256=5IvgGlJlgOmQYmldjdCBv7tuzAoY0HazatG5Pedrg0Q,4639
-model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=yRgiikyeELar4jlsdcf5pO9HQcxiyhKiAXY3lsMixew,13913
+model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=zAkzWpWP9_aobWgMo_BlUm7-4fR5dHvoGx0sDqs2rZg,14299
 model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
 model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=2YU-x4-Q5f6hkUJf0tw6vcwdNwRMHdefrFjhhyHYsvA,4782
 model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
@@ -353,9 +355,9 @@ model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha
 model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=kDuWw-6zh17wZpYWh4Xa94rpoodf82DksgjQCnL7nBc,2719
 model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
-model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=ksFXtepAsk-66xk7OPciG05kU9sgAUrWqjOgplsGSnw,15808
+model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=LN4vOwcMuSSFTSnHDACV9hX_Yd2YIXJRl7WkdODuA0k,16245
 model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=yXJzDd24zfGs2_vfMovxD1WSh1RxXoPxN4GztOf3P5c,3967
-model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=bImrTw9rrAVc3VD4nmrXmBo_K4fuf5m5XPPf8ybOThs,12430
+model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=-4USg-tep6EQSArcTxBowhMeAuExrBTNLOWgHFpsIy4,12699
 model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
 model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=TCA1hAc7raPnrjl06sjFtVM4XUtLtuwAhCGX4U3KGZo,4137
 model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
@@ -375,9 +377,9 @@ model_compression_toolkit/pruning/pytorch/pruning_facade.py,sha256=cSuvHHCqgr7k9
 model_compression_toolkit/ptq/__init__.py,sha256=Z_hkmTh7aLFei1DJKV0oNVUbrv_Q_0CTw-qD85Xf8UM,904
 model_compression_toolkit/ptq/runner.py,sha256=_c1dSjlPPpsx59Vbg1buhG9bZq__OORz1VlPkwjJzoc,2552
 model_compression_toolkit/ptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
-model_compression_toolkit/ptq/keras/quantization_facade.py,sha256=5Doa5Rwer84DRgJxLa2e6aX9B4yGYdmFGgiv71_wD9o,8992
+model_compression_toolkit/ptq/keras/quantization_facade.py,sha256=T1_UqXmOc4I2a6IHkQAlFhGtcAYjsXSApMIdRlvgDvg,10154
 model_compression_toolkit/ptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
-model_compression_toolkit/ptq/pytorch/quantization_facade.py,sha256=QW7lMRuuonupLPZ2w2PDIQd7qpDZ_euLInhskTc1Yes,7518
+model_compression_toolkit/ptq/pytorch/quantization_facade.py,sha256=eof9bo-Mv_lLY7fFpiVeT5pIde-MuTWkIAqRKH4j9MI,8646
 model_compression_toolkit/qat/__init__.py,sha256=kj2qsZh_Ca7PncsHKcaL5EVT2H8g4hYtvaQ3KFxOkwE,1143
 model_compression_toolkit/qat/common/__init__.py,sha256=6tLZ4R4pYP6QVztLVQC_jik2nES3l4uhML0qUxZrezk,829
 model_compression_toolkit/qat/common/qat_config.py,sha256=zoq0Vb74vCY7WlWD8JH_KPrHDoUHSvMc3gcO53u7L2U,3394
@@ -469,8 +471,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
 model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
 model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
 model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=7bbzqJN8ZAycVDvZr_5xC-niTAR5df8f03Kooev_pfg,3047
-mct_nightly-2.0.0.20240408.430.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.0.0.20240408.430.dist-info/METADATA,sha256=5s1f6HnTLamnYz4r2orEDjLgg0h-L-v3dcQkXT9K-c4,18795
-mct_nightly-2.0.0.20240408.430.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-mct_nightly-2.0.0.20240408.430.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.0.0.20240408.430.dist-info/RECORD,,
+mct_nightly-2.0.0.20240410.422.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.0.0.20240410.422.dist-info/METADATA,sha256=Xx2HTbZkpp4O8bS07IXSnaYSh9ZZTxe61I47ovv9fzE,18795
+mct_nightly-2.0.0.20240410.422.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+mct_nightly-2.0.0.20240410.422.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.0.0.20240410.422.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.0.0.20240408.000430"
+__version__ = "2.0.0.20240410.000422"

model_compression_toolkit/core/analyzer.py CHANGED Viewed

@@ -30,7 +30,8 @@ from model_compression_toolkit.logger import Logger
 def analyzer_model_quantization(representative_data_gen: Callable,
                                 tb_w: TensorboardWriter,
-                                tg: Graph,
+                                float_graph: Graph,
+                                quantized_graph: Graph,
                                 fw_impl: FrameworkImplementation,
                                 fw_info: FrameworkInfo):
     """
@@ -41,23 +42,32 @@ def analyzer_model_quantization(representative_data_gen: Callable,
     Args:
         representative_data_gen: Dataset used for calibration.
         tb_w: TensorBoardWriter object to log events.
-        tg: Graph of quantized model.
+        float_graph: Graph of float model.
+        quantized_graph: Graph of quantized model.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
         fw_info: Information needed for quantization about the specific framework.
     """
     if tb_w is not None:
-        visual = NNVisualizer(tg,
+        visual = NNVisualizer(float_graph,
+                              quantized_graph,
                               fw_impl=fw_impl,
                               fw_info=fw_info)
-        for i, _data in enumerate(representative_data_gen()):
-            if i >= NUM_SAMPLES_DISTANCE_TENSORBOARD:
-                break
-            figure = visual.plot_distance_graph(_data,
-                                                distance_fn=compute_cs,
-                                                convert_to_range=lambda a: 1 - 2 * a)
-            tb_w.add_figure(figure, f'similarity_distance_sample_{i}')
+        if not visual.has_compare_points():
+            Logger.error(f'No comparing points were found to plot analyze similarity.')
         else:
-            Logger.warning(f'Not enough batches in representative dataset to generate {NUM_SAMPLES_DISTANCE_TENSORBOARD} figures')
+            visualized_samples = 0
+            for _data in representative_data_gen():
+                batch_size = _data[0].shape[0]
+                for sample_index in range(batch_size):
+                    if visualized_samples >= NUM_SAMPLES_DISTANCE_TENSORBOARD:
+                        break
+                    figure = visual.plot_distance_graph(_data,
+                                                        sample_index=sample_index,
+                                                        distance_fn=compute_cs,
+                                                        convert_to_range=lambda a: 1 - 2 * a)
+                    tb_w.add_figure(figure, f'similarity_distance_sample_{visualized_samples}')
+                    visualized_samples += 1
+            if visualized_samples < NUM_SAMPLES_DISTANCE_TENSORBOARD:
+                Logger.error(f'Not enough batches in representative dataset to generate {NUM_SAMPLES_DISTANCE_TENSORBOARD} figures')
         tb_w.close()

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -41,24 +41,24 @@ class BaseNodeQuantizationConfig(object):
     Base class for node quantization configuration
     """
-    def set_quant_config_attr(self, parameter_name: str, parameter_value: Any,
+    def set_quant_config_attr(self, config_parameter_name: str, config_parameter_value: Any,
                               *args: List[Any], **kwargs: Dict[str, Any]):
         """
         Changes a BaseNodeQuantizationConfig's parameter.
         Note that arg and kwargs are only to allow clean override in the child classes.
         Args:
-            parameter_name: parameter name to change.
-            parameter_value: parameter value to change.
+            config_parameter_name: parameter name to change.
+            config_parameter_value: parameter value to change.
             args: A list of additional arguments.
             kwargs: A dictionary with additional key arguments.
         """
-        if hasattr(self, parameter_name):
-            setattr(self, parameter_name, parameter_value)
+        if hasattr(self, config_parameter_name):
+            setattr(self, config_parameter_name, config_parameter_value)
         else:
-            Logger.warning(f"Parameter {parameter_name} could not be found in the node quantization config and "
+            Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config and "
                            f"was not updated!")
     def __repr__(self) -> str:
@@ -106,6 +106,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
         self.z_threshold = qc.z_threshold
         self.shift_negative_ratio = qc.shift_negative_ratio
         self.shift_negative_threshold_recalculation = qc.shift_negative_threshold_recalculation
+        self.concat_threshold_update = qc.concat_threshold_update
     def quantize_node_output(self,
                              tensors: Any) -> Any:
@@ -219,7 +220,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
                self.shift_negative_activation_correction == other.shift_negative_activation_correction and \
                self.z_threshold == other.z_threshold and \
                self.shift_negative_ratio == other.shift_negative_ratio and \
-               self.shift_negative_threshold_recalculation == other.shift_negative_threshold_recalculation
+               self.shift_negative_threshold_recalculation == other.shift_negative_threshold_recalculation
     def __hash__(self):
         return hash((self.activation_quantization_fn,
@@ -521,7 +522,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                            f"{list(attrs_with_name.keys())}.")
         return attrs_with_name
-    def set_quant_config_attr(self, parameter_name: str, parameter_value: Any, attr_name: str = None,
+    def set_quant_config_attr(self, config_parameter_name: str, config_parameter_value: Any, attr_name: str = None,
                               *args: List[Any], **kwargs: Dict[str, Any]):
         """
         This method overrides the parent class set_quant_config_attr to enable setting a specific weights
@@ -529,26 +530,27 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         Args:
             attr_name: attribute name to change.
-            parameter_name: parameter name to change.
-            parameter_value: parameter value to change.
+            config_parameter_name: parameter name to change.
+            config_parameter_value: parameter value to change.
             args: A list of additional arguments.
             kwargs: A dictionary with additional key arguments.
         """
         if attr_name is None:
-            super(NodeWeightsQuantizationConfig, self).set_quant_config_attr(parameter_name, parameter_value,
+            super(NodeWeightsQuantizationConfig, self).set_quant_config_attr(config_parameter_name,
+                                                                             config_parameter_value,
                                                                              *args, **kwargs)
         else:
             if self.has_attribute_config(attr_name):
                 attr_cfg = self.get_attr_config(attr_name)
-                if hasattr(attr_cfg, parameter_name):
-                    setattr(attr_cfg, parameter_name, parameter_value)
+                if hasattr(attr_cfg, config_parameter_name):
+                    setattr(attr_cfg, config_parameter_name, config_parameter_value)
                 else:
-                    Logger.warning(f"Parameter {parameter_name} could not be found in the node quantization config of "
+                    Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config of "
                                    f"weights attribute {attr_name} and was not updated!")
             else:
-                Logger.error(f"Weights attribute {attr_name} could not be found to set parameter {parameter_name}.")
+                Logger.error(f"Weights attribute {attr_name} could not be found to set parameter {config_parameter_name}.")
     def __eq__(self, other: Any) -> bool:
         """

model_compression_toolkit/core/common/quantization/quantization_config.py CHANGED Viewed

@@ -62,7 +62,8 @@ class QuantizationConfig:
                  residual_collapsing: bool = True,
                  shift_negative_ratio: float = 0.05,
                  shift_negative_threshold_recalculation: bool = False,
-                 shift_negative_params_search: bool = False):
+                 shift_negative_params_search: bool = False,
+                 concat_threshold_update: bool = False):
         """
         Class to wrap all different parameters the library quantize the input model according to.
@@ -117,6 +118,7 @@ class QuantizationConfig:
         self.shift_negative_ratio = shift_negative_ratio
         self.shift_negative_threshold_recalculation = shift_negative_threshold_recalculation
         self.shift_negative_params_search = shift_negative_params_search
+        self.concat_threshold_update = concat_threshold_update
     def __repr__(self):
         return str(self.__dict__)

model_compression_toolkit/core/common/quantization/quantize_graph_weights.py CHANGED Viewed

@@ -23,7 +23,7 @@ from model_compression_toolkit.core.common.quantization.quantize_node import get
 from model_compression_toolkit.logger import Logger
-def quantize_graph_weights(graph: Graph) -> Graph:
+def quantize_graph_weights(graph_to_quantize: Graph) -> Graph:
     """
     Get a graph representing a model, and quantize its nodes' weights.
     Each node is quantized according to the passed framework info and quantization configuration.
@@ -31,12 +31,13 @@ def quantize_graph_weights(graph: Graph) -> Graph:
     is calculated and subtracted from the original node's bias. The graph is quantized in-place.
     Args:
-        graph: Graph to quantize its nodes.
+        graph_to_quantize: Graph to quantize its nodes.
     """
+    _quantized_graph = copy.deepcopy(graph_to_quantize)
     # Iterate over nodes in the graph and quantize each node's weights and activations
     # (according to operators groups in framework info).
-    for n in graph.nodes():
+    for n in _quantized_graph.nodes():
         for attr in n.get_node_weights_attributes():
             if n.is_weights_quantization_enabled(attr):
                 quantized_attr, io_channels_axes = \
@@ -51,4 +52,4 @@ def quantize_graph_weights(graph: Graph) -> Graph:
                 # Set the attribute to be the quantized attribute.
                 n.set_weights_by_keys(attr, quantized_attr)
-    return graph
+    return _quantized_graph

model_compression_toolkit/core/common/similarity_analyzer.py CHANGED Viewed

@@ -146,7 +146,10 @@ def compute_mae(float_tensor: np.ndarray,
     return error
-def compute_cs(float_tensor: np.ndarray, fxp_tensor: np.ndarray, eps: float = 1e-8, batch: bool = False,
+def compute_cs(float_tensor: np.ndarray,
+               fxp_tensor: np.ndarray,
+               eps: float = 1e-8,
+               batch: bool = False,
                axis: int = None) -> float:
     """
     Compute the similarity between two tensor using cosine similarity.

model_compression_toolkit/core/common/visualization/nn_visualizer.py CHANGED Viewed

@@ -12,19 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 from typing import Tuple, List, Callable
 import numpy as np
 from matplotlib import pyplot as plt
 from matplotlib.figure import Figure
-from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
 from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
 from model_compression_toolkit.core.common.similarity_analyzer import compute_cs
+from model_compression_toolkit.logger import Logger
 def _get_compare_points(input_graph: Graph) -> Tuple[List[BaseNode], List[str]]:
@@ -57,17 +58,21 @@ class NNVisualizer:
     def __init__(self,
                  graph_float: Graph,
+                 graph_quantized: Graph,
                  fw_impl: FrameworkImplementation,
                  fw_info: FrameworkInfo):
         """
         Initialize a NNVisualizer object.
         Args:
             graph_float: Float version of the graph.
+            graph_quantized: Quantized version of the graph.
+            fw_impl: Framework implementation with framework-specific methods implementation.
+            fw_info: Framework info with framework-specific information.
         """
         self.graph_float = graph_float
-        self.graph_quantized = quantize_graph_weights(graph_float)
+        self.graph_quantized = graph_quantized
         self.fw_impl = fw_impl
         self.fw_info = fw_info
@@ -75,6 +80,16 @@ class NNVisualizer:
         self.compare_points, self.compare_points_name = _get_compare_points(self.graph_quantized)
         self.compare_points_float, self.compare_points_name_float = _get_compare_points(self.graph_float)
+        if len(self.compare_points) != len(self.compare_points_float):
+            Logger.critical(f"Number of compare points in float and quantized models must be equal but "
+                            f"num of quantized compare points: {len(self.compare_points)} and "
+                            f"num of float compare points: {len(self.compare_points_float)}")
+        if len(self.compare_points_name) != len(self.compare_points_name_float):
+            Logger.critical(f"Number of compare points in float and quantized models must be equal "
+                            f"but num of quantized compare points: {len(self.compare_points_name)}"
+                            f" and num of float compare points: "
+                            f"{len(self.compare_points_name_float)}")
         self.quantized_model, _ = self.fw_impl.model_builder(self.graph_quantized,
                                                              mode=ModelBuilderMode.QUANTIZED,
                                                              append2output=self.compare_points,
@@ -85,8 +100,19 @@ class NNVisualizer:
                                                          append2output=self.compare_points_float,
                                                          fw_info=self.fw_info)
+    def has_compare_points(self) -> bool:
+        """
+        Returns: Whether or not compare points were found.
+        """
+        return len(self.compare_points_float) > 0 and len(self.compare_points) > 0 and len(
+            self.compare_points_name_float) > 0 and len(self.compare_points_name) > 0
     def plot_distance_graph(self,
                             input_image: np.ndarray,
+                            sample_index: int,
                             distance_fn: Callable = compute_cs,
                             convert_to_range: Callable = lambda a: a) -> Figure:
         """
@@ -95,6 +121,7 @@ class NNVisualizer:
         Args:
             input_image: Image to use as input to the networks.
+            sample_index: The index of the sample from input_image to use for comparison.
             distance_fn: Distance function to calculate the distance between two tensors.
             convert_to_range: Optional function to move the distance values into a specific range, e.g., when using
                 cosine similarity for distance, use 'lambda a: 1 - 2 * a' to convert the distance values to the range
@@ -108,7 +135,7 @@ class NNVisualizer:
         # to make the difference more noticeable when exists
         new_inputs = []
         for single_input in input_image:
-            img = single_input[0]
+            img = single_input[sample_index]
             new_inputs.append(np.expand_dims(img, axis=0))
         # Get outputs
@@ -123,7 +150,7 @@ class NNVisualizer:
         # Display the result: distance at every layer's output.
         fig = plt.figure()
-        plt.plot(distance_array)
+        plt.plot(list(range(len(distance_array))), distance_array)
         eps = 0.5
         y_limits = (min(distance_array) - eps, max(distance_array) + eps)
         plt.ylim(y_limits)

model_compression_toolkit/core/keras/graph_substitutions/substitutions/concat_threshold_update.py ADDED Viewed

@@ -0,0 +1,66 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from tensorflow.keras.layers import Concatenate
+import tensorflow as tf
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.constants import THRESHOLD
+class ConcatThresholdUpdate(common.BaseSubstitution):
+    """
+    Find concat layers and match their prior layers thresholds unless prior layer outputs to multiple layers.
+    """
+    def __init__(self):
+        """
+        Initialize a threshold_updater object.
+        """
+        concatination_node = NodeOperationMatcher(Concatenate) | \
+            NodeOperationMatcher(tf.concat)
+        super().__init__(matcher_instance=concatination_node)
+    def substitute(self,
+                   graph: Graph,
+                   node: BaseNode) -> Graph:
+        """
+        Update previous layers thresholds to match concatinations quantization thresholds. No change if
+        previous layer outputs to multiple layers. No change in case of uniform quantization.
+        No change in case of multiple quantization candidates (mixed precision).
+        Args:
+            graph: Graph we apply the substitution on.
+            node: Node refference to edit previous nodes thresholds.
+        Returns:
+            Graph after applying the substitution.
+        """
+        if len(node.candidates_quantization_cfg) == 1 and THRESHOLD in node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params:
+            concat_threshold = node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params[THRESHOLD]
+            prev_nodes = graph.get_prev_nodes(node)
+            for prev_node in prev_nodes:
+                if len(graph.get_next_nodes(prev_node))==1 and prev_node.type != Concatenate and prev_node.type != tf.concat:
+                    prev_node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params[THRESHOLD] = concat_threshold
+        return graph

model_compression_toolkit/core/keras/keras_implementation.py CHANGED Viewed

@@ -80,7 +80,8 @@ from model_compression_toolkit.core.keras.graph_substitutions.substitutions.line
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.residual_collapsing import \
     keras_residual_collapsing
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.input_scaling import InputScaling, \
-    InputScalingWithPad
+    InputScalingWithPad
+from model_compression_toolkit.core.keras.graph_substitutions.substitutions.concat_threshold_update import ConcatThresholdUpdate
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.relu_bound_to_power_of_2 import \
     ReLUBoundToPowerOfTwo
 from model_compression_toolkit.core.keras.graph_substitutions.substitutions.multi_head_attention_decomposition import \
@@ -300,8 +301,8 @@ class KerasImplementation(FrameworkImplementation):
         """
         return keras_op2d_add_const_collapsing()
-    def get_substitutions_post_statistics_collection(self, quant_config: QuantizationConfig) \
-            -> List[common.BaseSubstitution]:
+    def get_substitutions_post_statistics_collection(self,
+                                                     quant_config: QuantizationConfig) -> List[common.BaseSubstitution]:
         """
         Return a list of the framework substitutions used after we collect statistics.
@@ -317,6 +318,8 @@ class KerasImplementation(FrameworkImplementation):
         if quant_config.input_scaling:
             substitutions_list.append(InputScaling())
             substitutions_list.append(InputScalingWithPad())
+        if quant_config.concat_threshold_update:
+            substitutions_list.append(ConcatThresholdUpdate())
         return substitutions_list

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/concat_threshold_update.py ADDED Viewed

@@ -0,0 +1,69 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import List
+import torch
+from model_compression_toolkit.core import common
+from model_compression_toolkit.core.common.graph.base_graph import Graph
+from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
+from model_compression_toolkit.core.common.graph.base_node import BaseNode
+from model_compression_toolkit.constants import THRESHOLD
+class ConcatThresholdUpdate(common.BaseSubstitution):
+    """
+    Find concat layers and match their prior layers thresholds unless prior layer outputs to multiple layers.
+    """
+    def __init__(self):
+        """
+        Initialize a threshold_updater object.
+        """
+        concatination_node = NodeOperationMatcher(torch.cat) | \
+            NodeOperationMatcher(torch.concat)
+        super().__init__(matcher_instance=concatination_node)
+    def substitute(self,
+                   graph: Graph,
+                   node: BaseNode) -> Graph:
+        """
+        Update previous layers thresholds to match concatinations quantization thresholds. No change if
+        previous layer outputs to multiple layers. No change in case of uniform quantization.
+        No change in case of multiple quantization candidates (mixed precision).
+        Args:
+            graph: Graph we apply the substitution on.
+            node: Node refference to edit previous nodes thresholds.
+        Returns:
+            Graph after applying the substitution.
+        """
+        if len(node.candidates_quantization_cfg) == 1 and THRESHOLD in node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params:
+            concat_threshold = node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params[THRESHOLD]
+            prev_nodes = graph.get_prev_nodes(node)
+            for prev_node in prev_nodes:
+                if len(graph.get_next_nodes(prev_node))==1 and prev_node.type != torch.cat and prev_node.type != torch.concat:
+                    prev_node.candidates_quantization_cfg[0].activation_quantization_cfg.activation_quantization_params[THRESHOLD] = concat_threshold
+        return graph

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -73,6 +73,8 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.vi
     VirtualActivationWeightsComposition
 from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.weights_activation_split import \
     WeightsActivationSplit
+from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.concat_threshold_update import \
+    ConcatThresholdUpdate
 from model_compression_toolkit.core.pytorch.hessian.activation_trace_hessian_calculator_pytorch import \
     ActivationTraceHessianCalculatorPytorch
 from model_compression_toolkit.core.pytorch.hessian.weights_trace_hessian_calculator_pytorch import \
@@ -302,6 +304,8 @@ class PytorchImplementation(FrameworkImplementation):
             substitutions_list.append(pytorch_softmax_shift())
         if quant_config.input_scaling:
             Logger.critical('Input scaling is currently not supported for Pytorch.')
+        if quant_config.concat_threshold_update:
+            substitutions_list.append(ConcatThresholdUpdate())
         return substitutions_list

model_compression_toolkit/gptq/keras/gptq_training.py CHANGED Viewed

@@ -337,12 +337,16 @@ class KerasGPTQTrainer(GPTQTrainer):
                 node = node[0]
                 kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
                                                                       fw_info=self.fw_info)
+                # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
+                #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \
                     layer.weights_quantizers[kernel_attribute].update_layer_quantization_params(layer)
                 for weight_attr, weight in weights.items():
                     node.set_weights_by_keys(weight_attr, weight.numpy())
-                for config_attr, config_value in weight_quant_config.items():
-                    node.final_weights_quantization_cfg.set_quant_config_attr(config_attr, config_value)
+                for config_parameter_name, config_parameter_value in weight_quant_config.items():
+                    node.final_weights_quantization_cfg.set_quant_config_attr(config_parameter_name,
+                                                                              config_parameter_value,
+                                                                              attr_name=kernel_attribute)
                 for config_attr, config_value in activation_quant_config.items():
                     node.final_activation_quantization_cfg.set_quant_config_attr(config_attr, config_value)
                 if self.gptq_config.train_bias:

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from typing import Callable, Tuple
 from packaging import version
+from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
 from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
 from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
 from model_compression_toolkit.logger import Logger
@@ -210,6 +212,8 @@ if FOUND_TF:
                                                                   target_resource_utilization=target_resource_utilization,
                                                                   tb_w=tb_w)
+        float_graph = copy.deepcopy(tg)
         tg_gptq = gptq_runner(tg,
                               core_config,
                               gptq_config,
@@ -223,7 +227,12 @@ if FOUND_TF:
         del hessian_info_service
         if core_config.debug_config.analyze_similarity:
-            analyzer_model_quantization(representative_data_gen, tb_w, tg_gptq, fw_impl, fw_info)
+            analyzer_model_quantization(representative_data_gen,
+                                        tb_w,
+                                        float_graph,
+                                        tg_gptq,
+                                        fw_impl,
+                                        DEFAULT_KERAS_INFO)
         return get_exportable_keras_model(tg_gptq)

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -284,12 +284,16 @@ class PytorchGPTQTrainer(GPTQTrainer):
                 node = node[0]
                 kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=node.type,
                                                                       fw_info=self.fw_info)
+                # TODO: only kernel attributes are currently trained in GPTQ, so only the kernel weights need to be updated.
+                #  To enable GPTQ for other attributes, this code needs to be modified.
                 weights, weight_quant_config, activation_quant_config = \
                     layer.weights_quantizers[kernel_attribute].update_layer_quantization_params(layer)
                 for weight_attr, weight in weights.items():
                     node.set_weights_by_keys(weight_attr, self.fw_impl.to_numpy(weight))
-                for config_attr, config_value in weight_quant_config.items():
-                    node.final_weights_quantization_cfg.set_quant_config_attr(config_attr, config_value)
+                for config_parameter_name, config_parameter_value in weight_quant_config.items():
+                    node.final_weights_quantization_cfg.set_quant_config_attr(config_parameter_name,
+                                                                              config_parameter_value,
+                                                                              attr_name=kernel_attribute)
                 for config_attr, config_value in activation_quant_config.items():
                     node.final_activation_quantization_cfg.set_quant_config_attr(config_attr, config_value)
                 if self.gptq_config.train_bias and hasattr(layer.layer, BIAS):

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from typing import Callable
 from model_compression_toolkit.core import common
 from model_compression_toolkit.constants import FOUND_TORCH
@@ -177,6 +179,7 @@ if FOUND_TORCH:
                                                                      tpc=target_platform_capabilities,
                                                                      target_resource_utilization=target_resource_utilization,
                                                                      tb_w=tb_w)
+        float_graph = copy.deepcopy(graph)
         # ---------------------- #
         # GPTQ Runner
@@ -192,7 +195,12 @@ if FOUND_TORCH:
                                  hessian_info_service=hessian_info_service)
         if core_config.debug_config.analyze_similarity:
-            analyzer_model_quantization(representative_data_gen, tb_w, graph_gptq, fw_impl, DEFAULT_PYTORCH_INFO)
+            analyzer_model_quantization(representative_data_gen,
+                                        tb_w,
+                                        float_graph,
+                                        graph_gptq,
+                                        fw_impl,
+                                        DEFAULT_PYTORCH_INFO)
         return get_exportable_pytorch_model(graph_gptq)

model_compression_toolkit/ptq/keras/quantization_facade.py CHANGED Viewed

@@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from typing import Callable
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
+from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
 from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
@@ -122,8 +124,8 @@ if FOUND_TF:
         if core_config.mixed_precision_enable:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.critical("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
-                                    "API, or pass a valid mixed precision configuration.")  # pragma: no cover
+                                "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
+                                "API, or pass a valid mixed precision configuration.")  # pragma: no cover
         tb_w = init_tensorboard_writer(fw_info)
@@ -139,15 +141,30 @@ if FOUND_TF:
                                                target_resource_utilization=target_resource_utilization,
                                                tb_w=tb_w)
-        tg = ptq_runner(tg, representative_data_gen, core_config, fw_info, fw_impl, tb_w)
+        # At this point, tg is a graph that went through substitutions (such as BN folding) and is
+        # ready for quantization (namely, it holds quantization params, etc.) but the weights are
+        # not quantized yet. For this reason, we use it to create a graph that acts as a "float" graph
+        # for things like similarity analyzer (because the quantized and float graph should have the same
+        # architecture to find the appropriate compare points for similarity computation).
+        similarity_baseline_graph = copy.deepcopy(tg)
+        graph_with_stats_correction = ptq_runner(tg,
+                                                 representative_data_gen,
+                                                 core_config,
+                                                 fw_info,
+                                                 fw_impl,
+                                                 tb_w)
         if core_config.debug_config.analyze_similarity:
+            quantized_graph = quantize_graph_weights(graph_with_stats_correction)
             analyzer_model_quantization(representative_data_gen,
-                                        tb_w, tg,
+                                        tb_w,
+                                        similarity_baseline_graph,
+                                        quantized_graph,
                                         fw_impl,
                                         fw_info)
-        return get_exportable_keras_model(tg)
+        return get_exportable_keras_model(graph_with_stats_correction)

model_compression_toolkit/ptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 from typing import Callable
 from model_compression_toolkit.core import common
@@ -26,6 +28,7 @@ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quant
 from model_compression_toolkit.core.runner import core_runner
 from model_compression_toolkit.ptq.runner import ptq_runner
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
+from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
 if FOUND_TORCH:
@@ -90,14 +93,16 @@ if FOUND_TORCH:
         """
+        fw_info = DEFAULT_PYTORCH_INFO
         if core_config.mixed_precision_enable:
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.critical("Given quantization config to mixed-precision facade is not of type "
-                             "MixedPrecisionQuantizationConfig. Please use "
-                             "pytorch_post_training_quantization API, or pass a valid mixed precision "
-                             "configuration.")  # pragma: no cover
+                                "MixedPrecisionQuantizationConfig. Please use "
+                                "pytorch_post_training_quantization API, or pass a valid mixed precision "
+                                "configuration.")  # pragma: no cover
-        tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
+        tb_w = init_tensorboard_writer(fw_info)
         fw_impl = PytorchImplementation()
@@ -105,22 +110,36 @@ if FOUND_TORCH:
         tg, bit_widths_config, _ = core_runner(in_model=in_module,
                                                representative_data_gen=representative_data_gen,
                                                core_config=core_config,
-                                               fw_info=DEFAULT_PYTORCH_INFO,
+                                               fw_info=fw_info,
                                                fw_impl=fw_impl,
                                                tpc=target_platform_capabilities,
                                                target_resource_utilization=target_resource_utilization,
                                                tb_w=tb_w)
-        tg = ptq_runner(tg, representative_data_gen, core_config, DEFAULT_PYTORCH_INFO, fw_impl, tb_w)
+        # At this point, tg is a graph that went through substitutions (such as BN folding) and is
+        # ready for quantization (namely, it holds quantization params, etc.) but the weights are
+        # not quantized yet. For this reason, we use it to create a graph that acts as a "float" graph
+        # for things like similarity analyzer (because the quantized and float graph should have the same
+        # architecture to find the appropriate compare points for similarity computation).
+        similarity_baseline_graph = copy.deepcopy(tg)
+        graph_with_stats_correction = ptq_runner(tg,
+                                                 representative_data_gen,
+                                                 core_config,
+                                                 fw_info,
+                                                 fw_impl,
+                                                 tb_w)
         if core_config.debug_config.analyze_similarity:
+            quantized_graph = quantize_graph_weights(graph_with_stats_correction)
             analyzer_model_quantization(representative_data_gen,
                                         tb_w,
-                                        tg,
+                                        similarity_baseline_graph,
+                                        quantized_graph,
                                         fw_impl,
-                                        DEFAULT_PYTORCH_INFO)
+                                        fw_info)
-        return get_exportable_pytorch_model(tg)
+        return get_exportable_pytorch_model(graph_with_stats_correction)
 else:

{mct_nightly-2.0.0.20240408.430.dist-info → mct_nightly-2.0.0.20240410.422.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240408.430.dist-info → mct_nightly-2.0.0.20240410.422.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.0.0.20240408.430.dist-info → mct_nightly-2.0.0.20240410.422.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.0.0.20240408.430__py3-none-any.whl → 2.0.0.20240410.422__py3-none-any.whl

mct-nightly 2.0.0.20240408.430py3-none-any.whl → 2.0.0.20240410.422py3-none-any.whl