mct-nightly 2.2.0.20241224.532__py3-none-any.whl → 2.2.0.20241230.534__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/RECORD +21 -21
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/core/common/fusion/graph_fuser.py +8 -7
  5. model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +6 -6
  6. model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py +14 -8
  7. model_compression_toolkit/core/common/graph/memory_graph/memory_element.py +6 -1
  8. model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py +14 -2
  9. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +47 -13
  10. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +55 -8
  11. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +1 -1
  12. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +89 -2
  13. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +2 -2
  14. model_compression_toolkit/core/keras/data_util.py +4 -5
  15. model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py +1 -1
  16. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py +3 -3
  17. model_compression_toolkit/core/pytorch/pytorch_implementation.py +3 -2
  18. model_compression_toolkit/core/pytorch/reader/graph_builders.py +8 -6
  19. {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/LICENSE.md +0 -0
  20. {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/WHEEL +0 -0
  21. {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mct-nightly
3
- Version: 2.2.0.20241224.532
3
+ Version: 2.2.0.20241230.534
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- model_compression_toolkit/__init__.py,sha256=9suCm_ya-q7binwaiEyGExSDb8bJgOWwJ3wBnV_el2Y,1573
1
+ model_compression_toolkit/__init__.py,sha256=iPjPjwwH50JpoFzcLJDS6XEjBVsJQmsuUenuZxUXTPg,1573
2
2
  model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
3
3
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
4
4
  model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -29,7 +29,7 @@ model_compression_toolkit/core/common/collectors/mean_collector.py,sha256=mjr3U_
29
29
  model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.py,sha256=5oKsJEKdVmj4C7fKdHhmrFN5k4G2BaFETpmf_xKNs7s,5207
30
30
  model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=vcf7Pk1v09SJC4fbAWf_8AgTktE6tPizJbQpSmocP2U,7930
31
31
  model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
32
- model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=5VkHB2fW0ohfPQmISz6o4fCMV8QyFdj5_kU51lN0JS8,6214
32
+ model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=b41_4rL_Adiza4vpWlmmqgvkpUmWVdfdx0nEIB0p2n8,6195
33
33
  model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
34
34
  model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
35
35
  model_compression_toolkit/core/common/graph/base_graph.py,sha256=6jlwj4WDT3XJKf1QQ4Aougswhl-Xx51QzV58CePbjVg,37818
@@ -41,11 +41,11 @@ model_compression_toolkit/core/common/graph/graph_searches.py,sha256=2oKuW6L8hP-
41
41
  model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py,sha256=3el-A7j1oyoo1_9zq3faQp7IeRsFXFCvnrb3zZFXpU0,9803
42
42
  model_compression_toolkit/core/common/graph/memory_graph/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
43
43
  model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py,sha256=X6FK3C3y8ixFRPjC_wm3ClloCX8_06SOdA1TRi7o_LA,3800
44
- model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=-bVPbzMMaDpbacjFOafBsxbmJFHaD4tE8IAHobLzop4,2858
44
+ model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=7KbAQ21mToemAjmpsUKknRzoljOaQ62VTxUSsLTSskU,2877
45
45
  model_compression_toolkit/core/common/graph/memory_graph/cut.py,sha256=aPdXJPP5a5Rnu5Z5XqTZZkuGtdgHVu0RmX_NOfNM6Tc,2470
46
- model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=crV2NCLVO8jx9MlryZBYuJKFe_G9HfM7rUR64fDymlw,17045
47
- model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=gRmBEFRmyJsNKezQfiwDwQu1cmbGd2wgKCRTH6iw8mw,3961
48
- model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=gw4av_rzn_3oEAPpD3B7PHZDqnxHMjIESevl6ppPnkk,7175
46
+ model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=gOVnHt0JaZvyuA0T1oTFSR59cwFO46E5OOC53h19nvg,17839
47
+ model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=ISD2BvJWj5mB91jrFjG8VQb0oOoLBoita_thCZWzCPI,4238
48
+ model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=3OC8kMXuzBv-R7wWmKY-i1AQNAr5x3LBZ4aj7hHF-cQ,7791
49
49
  model_compression_toolkit/core/common/hessian/__init__.py,sha256=E7LK3K_1AwMCQokanNc1JODMwUKNOKmwXQiGQ7GO10I,1033
50
50
  model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=YynbVHdHH2gPlk1QHXH6GygIkXRZ9qxR14cpgKrHPT0,13238
51
51
  model_compression_toolkit/core/common/hessian/hessian_info_utils.py,sha256=1axmN0tjJSo_7hUr2d2KMv4y1pBi19cqWSQpi4BbdsA,1458
@@ -66,18 +66,18 @@ model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha2
66
66
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=15PbLAfuIyQInFczPka_MuyO4AJzAaOm9bOi3bzllxc,4531
67
67
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
68
68
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=B7xLl8P5eCz0_fBxocDlNiv6k-3MdfMUk2GjYKl2p5k,7522
69
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=hlaV4ybreT0DR4ftLSPg5KTit3BEm9dWA7Y8NHpEJ8w,37532
69
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=UGoIp5Fb8WwZpBSrEr_KO7SRdhSg5XuZq2ZzjL1ILuc,39296
70
70
  model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=adjuvrJcan7Ua3nYlJX7T6qGkCRHGqWMaM5-099a9Us,27220
71
71
  model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
72
72
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=cjmHFU4peJ6qYP8lsIkYYSLvRddDbiSQ6mPZnZy0p6U,7905
73
73
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
74
74
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=MtPkZfPIJWI191Hbjp6JluUyLnqiJRi3zNf-CqVNuag,5053
75
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=Wu89Rl6gAB5vL5l8jPH-4GFeKG41jusAb_yiHQ9Sjxs,14978
75
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=zVdOL80tbVAGUBT-JzeyBNGXASmutJTCTW0G6AQz7WY,17319
76
76
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py,sha256=PmuVXCKgwRNvG7pLGdA24Ren1lFH5hW51_FrOmUVHwU,4199
77
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=mOxZwOQYnOwSJMiapEEH9o-89ujJdPxSl8zXpnApc0U,1850
78
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=WC1EHoNuo_lrzy4NRhGJ1cgmJ2IsFsbmP86mrVO3AVA,21506
77
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=Z-cFOGUysk33OQgxZrmqn6dvMorR4m3xTgxjuLkplbs,1850
78
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=wctoYZE2vskmlPKvGMZ3UVBhyKQVP362crh_k0D_Bx0,25538
79
79
  model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
80
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=YsA2CVrGt_VGJzZ9TMqPtz5b1YX_jb-Qfb9QfV-RXsc,16568
80
+ model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=c_msFU7zoBpkcT9_-P-OLyPTDd9hZEdFjFUQ0Y9pLaY,16574
81
81
  model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
82
82
  model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
83
83
  model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -155,7 +155,7 @@ model_compression_toolkit/core/common/visualization/tensorboard_writer.py,sha256
155
155
  model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
156
156
  model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiOcnLAq1v0MMBCJqMJzzFk,3225
157
157
  model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
158
- model_compression_toolkit/core/keras/data_util.py,sha256=sTEuHUrT8S3CpeAEG0XDlYA0bWZKISGPilObPlO0TA8,6833
158
+ model_compression_toolkit/core/keras/data_util.py,sha256=HQj3-GP5oT5JHpYt80mtKhZjTCvKYs6c3Ll0txEgKHQ,6892
159
159
  model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
160
160
  model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
161
161
  model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
@@ -176,7 +176,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm
176
176
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_reconstruction.py,sha256=GR1a3mCZpNUu4WxixJXF_aSm57phAdxaRoHecNx3hxw,3168
177
177
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_refusing.py,sha256=5df_xGfXkqNub4xVRnCWQvSohWqdv12axjJ6edVU2H0,2478
178
178
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/concat_threshold_update.py,sha256=Hl4LEQ_bw_Vpmf3ZqHujYUqVdvTNsPlEMvr9dZhwg2U,2806
179
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=RwzqSksGNmN1KPH8RTJzpCSjGgxvtT9kqqPqsjbGPqs,11631
179
+ model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=YHEh3rtTD61doT_oz8Tw7fg5AKKHxXvbpW_GmVbSUVw,11651
180
180
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py,sha256=R3U7cjc2E0zheMem16GHygp5jZFGSaomkNOTxTjcAgw,5794
181
181
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py,sha256=V6hp67CkS_A3WqdsjLjs0ETtdZAOo4P9mhy4aT7W5FE,5940
182
182
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py,sha256=AvquvVVVT8-ioeVn-gjqysK4L41L3I7TlNOEDfWjViY,8185
@@ -223,7 +223,7 @@ model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVv
223
223
  model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
224
224
  model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
225
225
  model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
226
- model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Xwt7eHS-QJJc1fyOrxL2tz8E2CP-b2M0_R-Dgb1Gm-4,29558
226
+ model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Mfdq15JLKWAAkpnOt_urcPOVXhqONTvzORyLOG-_Klo,29659
227
227
  model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
228
228
  model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
229
229
  model_compression_toolkit/core/pytorch/utils.py,sha256=7VbgcLwtQvdEEc_AJgSOQ3U3KRKCICFPaBirN1fIQxg,3940
@@ -254,7 +254,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_
254
254
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py,sha256=hAZXzrEinHa-dJHLj39Hy_9Q-13QyO95rtYVSLrhvT8,4915
255
255
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py,sha256=DcJEIkGvBdIMOelNIwaJUZ5UsAHiGnDJPR20I464vWo,2929
256
256
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py,sha256=XFtU9yuBmoZlX0f0mS6otMPWMk-RcWs94XdvvTNhW8Y,3303
257
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=ziL7jwTnjzTf7BHPRPYgWBSCUrSXSyjZnvQqsJhD1nM,12466
257
+ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=SBtIuxb1Q2oUMJKSrAyN2wuaY4k1tsKt7qql0dP_PE0,12473
258
258
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/shift_negative_activation.py,sha256=3WCLvPyx7tVkM0rwYhYq-gntCzW9R_DcImR1ucKlPac,10772
259
259
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/softmax_shift.py,sha256=05lV4pIL3hJkZl4JQPV4wk_EFD0eYLG5b8cdzvZk4P8,1588
260
260
  model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/transform_function_call_method.py,sha256=EC9Dvp-_UlpDWnipnf8ds65wh_Y-T8pXAFIwRScWpiY,2044
@@ -273,7 +273,7 @@ model_compression_toolkit/core/pytorch/quantizer/__init__.py,sha256=Rf1RcYmelmdZ
273
273
  model_compression_toolkit/core/pytorch/quantizer/fake_quant_builder.py,sha256=D8_CEuFqKAhbUgKaRw7Jlxo0zlqgPTMu6CIIIM4LfS0,7045
274
274
  model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py,sha256=uyeBtNokyDUikk-YkDP_mN_2DX0J5oPm3kSfdSUT2Ck,4420
275
275
  model_compression_toolkit/core/pytorch/reader/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
276
- model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=mo1NIYXxiAigbTZvNgQeLi6vzLn0RqU0RxcxZKE27cE,19335
276
+ model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=RBNhPuz02kstVVIDibHUES_Skn9feg3gOGbQylM8h-A,19547
277
277
  model_compression_toolkit/core/pytorch/reader/node_holders.py,sha256=7XNc7-l1MZPJGcOESvtAwfIMxrU6kvt3YjF5B7qOqK4,1048
278
278
  model_compression_toolkit/core/pytorch/reader/reader.py,sha256=GEJE0QX8XJFWbYCkbRBtzttZtmmuoACLx8gw9KyAQCE,6015
279
279
  model_compression_toolkit/core/pytorch/statistics_correction/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
@@ -560,8 +560,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
560
560
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
561
561
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
562
562
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
563
- mct_nightly-2.2.0.20241224.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
564
- mct_nightly-2.2.0.20241224.532.dist-info/METADATA,sha256=TevkRWHqm2UgHf34bwK7NWHCKt4tIUfdOvpVaA4-CIU,26453
565
- mct_nightly-2.2.0.20241224.532.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
566
- mct_nightly-2.2.0.20241224.532.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
567
- mct_nightly-2.2.0.20241224.532.dist-info/RECORD,,
563
+ mct_nightly-2.2.0.20241230.534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
564
+ mct_nightly-2.2.0.20241230.534.dist-info/METADATA,sha256=yyeiq5zHdWmEdPcLFzs1V6GQR86hrjTPm05Nbj-rGCs,26453
565
+ mct_nightly-2.2.0.20241230.534.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
566
+ mct_nightly-2.2.0.20241230.534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
567
+ mct_nightly-2.2.0.20241230.534.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.2.0.20241224.000532"
30
+ __version__ = "2.2.0.20241230.000534"
@@ -36,10 +36,10 @@ class GraphFuser:
36
36
  The fusion process involves:
37
37
  1. Creating new fused nodes to represent these groups.
38
38
  2. Updating the graph structure to replace the original nodes with fused nodes.
39
- 3. Maintaining mapping mapping of original node names to their fused node names.
39
+ 3. Maintaining mapping of original node names to their fused node names.
40
40
 
41
41
  Args:
42
- graph: Graph to sue its nodes.
42
+ graph: Graph to fuse its nodes.
43
43
 
44
44
  Returns:
45
45
  Mapping of original node names to their fused node names
@@ -54,7 +54,8 @@ class GraphFuser:
54
54
  fused_nodes_mapping[node.name] = new_fused_node.name
55
55
  return fused_nodes_mapping
56
56
 
57
- def _create_fused_node(self, nodes: List[BaseNode]) -> BaseNode:
57
+ @staticmethod
58
+ def _create_fused_node(nodes: List[BaseNode]) -> BaseNode:
58
59
  """
59
60
  Create a new node that represents the fusion of the given nodes.
60
61
 
@@ -79,10 +80,10 @@ class GraphFuser:
79
80
 
80
81
  return fused_node
81
82
 
82
- def _replace_nodes_with_fused_node(self,
83
- graph: Graph,
84
- nodes_to_fuse: List[BaseNode],
85
- fused_node: BaseNode):
83
+ @staticmethod
84
+ def _replace_nodes_with_fused_node(graph: Graph,
85
+ nodes_to_fuse: List[BaseNode],
86
+ fused_node: BaseNode):
86
87
  """
87
88
  Replace the specified nodes in the graph with a new fused node.
88
89
 
@@ -51,13 +51,13 @@ def compute_graph_max_cut(memory_graph: MemoryGraph,
51
51
  estimate = (u_bound + l_bound) / 2
52
52
  schedule, max_cut_size, cuts = max_cut_astar.solve(estimate_factor=estimate, iter_limit=astar_n_iter)
53
53
  if schedule is None:
54
- return last_result
54
+ l_bound = estimate
55
+ else:
56
+ u_bound = min(estimate, max_cut_size)
57
+ last_result = (schedule, max_cut_size, cuts)
55
58
 
56
- next_u_bound = min(estimate, max_cut_size)
57
- last_result = (schedule, max_cut_size, cuts)
58
-
59
- if l_bound * (1 + eps) >= next_u_bound:
60
- return last_result
59
+ if l_bound * (1 + eps) >= u_bound:
60
+ return last_result
61
61
 
62
62
  it += 1
63
63
 
@@ -154,6 +154,9 @@ class MaxCutAstar:
154
154
  cut_route = routes[next_cut]
155
155
 
156
156
  if next_cut == self.target_cut:
157
+ # TODO maxcut: Why do we filter the cuts (cut_route) but not the max cut size (cut_sost).
158
+ # This is a mismatch between max_cut and max(cuts).
159
+ # Also, unfiltered cut_route seems perfect, including input and output tensor sizes of current op.
157
160
  return self._remove_dummys_from_path(cut_route[0].op_order), cut_cost,\
158
161
  list(set([self._remove_dummys_from_cut(self.clean_memory_for_next_step(c)) for c in cut_route]))
159
162
 
@@ -178,7 +181,8 @@ class MaxCutAstar:
178
181
  cost = self.accumulate(cut_cost, c.memory_size())
179
182
  if c not in open_list:
180
183
  self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
181
- elif self.ordering(cost, costs[c]):
184
+ # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
185
+ elif self.ordering(cost, costs[c]): # pragma: no cover
182
186
  # If we already saw this cut during the search with a larger cost, then we want to update the order
183
187
  # of the schedule in the cut
184
188
  # Remove call - removes the cut with the same memory elements but different ordering from open
@@ -187,7 +191,8 @@ class MaxCutAstar:
187
191
  self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
188
192
 
189
193
  # Halt or No Solution
190
- return None, 0, None
194
+ # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
195
+ return None, 0, None # pragma: no cover
191
196
 
192
197
  @staticmethod
193
198
  def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: List[Cut],
@@ -223,8 +228,7 @@ class MaxCutAstar:
223
228
 
224
229
  """
225
230
  ordered_cuts_list = sorted(open_list,
226
- key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), len(routes[c])),
227
- reverse=False)
231
+ key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), -len(routes[c])))
228
232
 
229
233
  assert len(ordered_cuts_list) > 0
230
234
  return ordered_cuts_list[0]
@@ -349,7 +353,8 @@ class MaxCutAstar:
349
353
  Returns: True if the first cost is smaller than the second one, else otherwise.
350
354
 
351
355
  """
352
- return cost_1 < cost_2
356
+ # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
357
+ return cost_1 < cost_2 # pragma: no cover
353
358
 
354
359
  def estimate(self, cut: Cut, estimate_factor: float) -> float:
355
360
  """
@@ -377,9 +382,10 @@ class MaxCutAstar:
377
382
  Returns: An initial estimate value.
378
383
 
379
384
  """
380
- l_bound = memory_graph.memory_lbound_single_op
381
- u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound
382
- return (u_bound + l_bound) / 2
385
+ # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
386
+ l_bound = memory_graph.memory_lbound_single_op # pragma: no cover
387
+ u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound # pragma: no cover
388
+ return (u_bound + l_bound) / 2 # pragma: no cover
383
389
 
384
390
  @staticmethod
385
391
  def _remove_dummys_from_path(path: List[BaseNode]) -> List[BaseNode]:
@@ -30,7 +30,12 @@ class ActivationMemoryTensor:
30
30
  init_size_to_zero: Whether to initialize the memory tensor size to 0 or not.
31
31
  """
32
32
 
33
- self.shape = shape[1:] # remove batch size (first element) from output shape
33
+ # remove batch size (first element) from output shape. If the shape is a list then remove the first
34
+ # axis. If shape a vector (e.g. output of size) then set the shape minus 1 to ignore the batch value.
35
+ if len(shape) == 1:
36
+ self.shape = [] if shape[0] is None else [shape[0] - 1]
37
+ else:
38
+ self.shape = shape[1:]
34
39
  # The total size of a tensor is considered to be the number of elements in the tensor
35
40
  self.total_size = self._get_tensor_total_size() if not init_size_to_zero else 0
36
41
 
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  from typing import List
16
+ from operator import getitem
16
17
 
17
18
  from model_compression_toolkit.core.common import Graph, BaseNode
18
19
  from model_compression_toolkit.core.common.graph.edge import EDGE_SOURCE_INDEX
@@ -45,7 +46,8 @@ class MemoryGraph(DirectedBipartiteGraph):
45
46
  tensor_to_node = []
46
47
 
47
48
  for n in nodes:
48
- n_outputs = [n.output_shape] if isinstance(n.output_shape, tuple) else n.output_shape
49
+ n_outputs = n.output_shape if isinstance(n.output_shape[0], (tuple, list)) else [n.output_shape]
50
+
49
51
  out_edges = model_graph.out_edges(n, sort_by_attr=EDGE_SOURCE_INDEX)
50
52
 
51
53
  for i, ot in enumerate(n_outputs):
@@ -54,7 +56,16 @@ class MemoryGraph(DirectedBipartiteGraph):
54
56
  # Add memory tensor as current node's output
55
57
  node_to_tensor.append((n, memory_tensor))
56
58
 
57
- ot_edges = [oe for oe in out_edges if oe.source_index == i]
59
+ # TODO maxcut: refactor this code. it handles split->getitem generated by fx.
60
+ ot_edges = []
61
+ for oe in out_edges:
62
+ if oe.sink_node.type is getitem and len(oe.sink_node.op_call_args) == 1 and isinstance(oe.sink_node.op_call_args[0], int):
63
+ source_index = oe.sink_node.op_call_args[0]
64
+ else:
65
+ source_index = oe.source_index
66
+ if source_index == i:
67
+ ot_edges.append(oe)
68
+
58
69
  for oe in ot_edges:
59
70
  # Add current memory tensor as input to current node's successors
60
71
  tensor_to_node.append((memory_tensor, oe.sink_node))
@@ -71,6 +82,7 @@ class MemoryGraph(DirectedBipartiteGraph):
71
82
  inputs_tensors_memory = [sum([t.total_size for t in self.operation_node_children(n)])
72
83
  for n in nodes if n in model_graph.get_inputs()]
73
84
 
85
+ # TODO maxcut: why both inputs and outputs of each nodes, while the A* solves for node outputs only???
74
86
  nodes_total_memory = [sum([t.total_size for t in self.operation_node_children(n)] +
75
87
  [t.total_size for t in self.operation_node_parents(n)])
76
88
  for n in nodes if n not in model_graph.get_inputs()]
@@ -24,8 +24,10 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph
24
24
  from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
25
25
  VirtualSplitWeightsNode, VirtualSplitActivationNode
26
26
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
27
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
27
28
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
28
- from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
29
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
30
+ from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
29
31
  from model_compression_toolkit.core.common.framework_info import FrameworkInfo
30
32
  from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
31
33
 
@@ -40,7 +42,7 @@ class MixedPrecisionSearchManager:
40
42
  fw_info: FrameworkInfo,
41
43
  fw_impl: FrameworkImplementation,
42
44
  sensitivity_evaluator: SensitivityEvaluation,
43
- ru_functions: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
45
+ ru_functions: Dict[RUTarget, RuFunctions],
44
46
  target_resource_utilization: ResourceUtilization,
45
47
  original_graph: Graph = None):
46
48
  """
@@ -65,8 +67,11 @@ class MixedPrecisionSearchManager:
65
67
  self.sensitivity_evaluator = sensitivity_evaluator
66
68
  self.layer_to_bitwidth_mapping = self.get_search_space()
67
69
  self.compute_metric_fn = self.get_sensitivity_metric()
70
+ self._cuts = None
68
71
 
69
- self.compute_ru_functions = ru_functions
72
+ ru_types = [ru_target for ru_target, ru_value in
73
+ target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
74
+ self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
70
75
  self.target_resource_utilization = target_resource_utilization
71
76
  self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
72
77
  self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
@@ -76,6 +81,17 @@ class MixedPrecisionSearchManager:
76
81
  self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
77
82
  original_graph=self.original_graph)
78
83
 
84
+ @property
85
+ def cuts(self) -> List[Cut]:
86
+ """
87
+ Calculates graph cuts. Written as property, so it will only be calculated once and
88
+ only if cuts are needed.
89
+
90
+ """
91
+ if self._cuts is None:
92
+ self._cuts = calc_graph_cuts(self.original_graph)
93
+ return self._cuts
94
+
79
95
  def get_search_space(self) -> Dict[int, List[int]]:
80
96
  """
81
97
  The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
@@ -106,6 +122,21 @@ class MixedPrecisionSearchManager:
106
122
 
107
123
  return self.sensitivity_evaluator.compute_metric
108
124
 
125
+ def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
126
+ """
127
+ Computes a resource utilization for a certain mixed precision configuration.
128
+ The method computes a resource utilization vector for specific target resource utilization.
129
+
130
+ Returns: resource utilization value.
131
+
132
+ """
133
+ # ru_fn is a pair of resource utilization computation method and
134
+ # resource utilization aggregation method (in this method we only need the first one)
135
+ if ru_target is RUTarget.ACTIVATION:
136
+ return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
137
+ else:
138
+ return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)
139
+
109
140
  def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
110
141
  """
111
142
  Computes a resource utilization vector with the values matching to the minimal mp configuration
@@ -118,10 +149,10 @@ class MixedPrecisionSearchManager:
118
149
 
119
150
  """
120
151
  min_ru = {}
121
- for ru_target, ru_fns in self.compute_ru_functions.items():
122
- # ru_fns is a pair of resource utilization computation method and
152
+ for ru_target, ru_fn in self.compute_ru_functions.items():
153
+ # ru_fns is a pair of resource utilization computation method and
123
154
  # resource utilization aggregation method (in this method we only need the first one)
124
- min_ru[ru_target] = ru_fns[0](self.min_ru_config, self.graph, self.fw_info, self.fw_impl)
155
+ min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)
125
156
 
126
157
  return min_ru
127
158
 
@@ -212,7 +243,7 @@ class MixedPrecisionSearchManager:
212
243
 
213
244
  """
214
245
  cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
215
- return self.compute_ru_functions[target].metric_fn(cfg, self.graph, self.fw_info, self.fw_impl)
246
+ return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
216
247
 
217
248
  @staticmethod
218
249
  def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -241,13 +272,15 @@ class MixedPrecisionSearchManager:
241
272
  """
242
273
 
243
274
  non_conf_ru_dict = {}
244
- for target, ru_value in self.target_resource_utilization.get_resource_utilization_dict().items():
275
+ for target, ru_fns in self.compute_ru_functions.items():
245
276
  # Call for the ru method of the given target - empty quantization configuration list is passed since we
246
277
  # compute for non-configurable nodes
247
278
  if target == RUTarget.BOPS:
248
279
  ru_vector = None
280
+ elif target == RUTarget.ACTIVATION:
281
+ ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
249
282
  else:
250
- ru_vector = self.compute_ru_functions[target].metric_fn([], self.graph, self.fw_info, self.fw_impl)
283
+ ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)
251
284
 
252
285
  non_conf_ru_dict[target] = ru_vector
253
286
 
@@ -266,14 +299,15 @@ class MixedPrecisionSearchManager:
266
299
  """
267
300
 
268
301
  ru_dict = {}
269
-
270
302
  for ru_target, ru_fns in self.compute_ru_functions.items():
271
303
  # Passing False to ru methods and aggregations to indicates that the computations
272
304
  # are not for constraints setting
273
305
  if ru_target == RUTarget.BOPS:
274
- configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False)
306
+ configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
307
+ elif ru_target == RUTarget.ACTIVATION:
308
+ configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
275
309
  else:
276
- configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
310
+ configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
277
311
  non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
278
312
  if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
279
313
  ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
@@ -647,7 +681,7 @@ class ConfigReconstructionHelper:
647
681
  # It's ok, need to find the node's configuration
648
682
  self.retrieve_weights_activation_config(activation_node, weights_node, virtual_node, virtual_cfg_idx, virtual_mp_cfg)
649
683
  else:
650
- Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
684
+ Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{weights_node.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
651
685
 
652
686
  def update_config_at_original_idx(self, n: BaseNode, origin_cfg_idx: int):
653
687
  """
@@ -13,10 +13,12 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  import copy
16
+ from collections import defaultdict
16
17
 
17
18
  import numpy as np
18
19
  from typing import Callable, Any, Dict, Tuple
19
20
 
21
+ from model_compression_toolkit.logger import Logger
20
22
  from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
21
23
  from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
22
24
  from model_compression_toolkit.core.common import Graph
@@ -25,6 +27,7 @@ from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
25
27
  from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
26
28
  from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
27
29
  from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions
30
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import calc_graph_cuts
28
31
 
29
32
 
30
33
  def compute_resource_utilization_data(in_model: Any,
@@ -76,7 +79,7 @@ def compute_resource_utilization_data(in_model: Any,
76
79
  total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)
77
80
 
78
81
  # Compute max activation tensor
79
- activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph)
82
+ activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_maxcut_sizes(graph=transformed_graph)
80
83
  max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
81
84
 
82
85
  # Compute total memory utilization - parameters sum + max activation tensor
@@ -132,7 +135,52 @@ def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[
132
135
 
133
136
  return np.array(weights_memory_bytes), np.array(weights_params)
134
137
 
135
- def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
138
+
139
+ def compute_activation_output_maxcut_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
140
+ """
141
+ Computes an array of the respective output tensor maxcut size and an array of the output tensor
142
+ cut size in bytes for each cut.
143
+
144
+ Args:
145
+ graph: A finalized Graph object, representing the model structure.
146
+
147
+ Returns:
148
+ A tuple containing two arrays:
149
+ - The first is an array of the size of each activation max-cut size in bytes, calculated
150
+ using the maximal bit-width for quantization.
151
+ - The second array an array of the size of each activation max-cut activation size in number of parameters.
152
+
153
+ """
154
+ cuts = calc_graph_cuts(graph)
155
+
156
+ # map nodes to cuts.
157
+ node_to_cat_mapping = defaultdict(list)
158
+ for i, cut in enumerate(cuts):
159
+ mem_element_names = [m.node_name for m in cut.mem_elements.elements]
160
+ for m_name in mem_element_names:
161
+ if len(graph.find_node_by_name(m_name)) > 0:
162
+ node_to_cat_mapping[m_name].append(i)
163
+ else:
164
+ Logger.critical(f"Missing node: {m_name}") # pragma: no cover
165
+
166
+ activation_outputs = np.zeros(len(cuts))
167
+ activation_outputs_bytes = np.zeros(len(cuts))
168
+ for n in graph.nodes:
169
+ # Go over all nodes that have activation quantization enabled.
170
+ if n.has_activation_quantization_enabled_candidate():
171
+ # Fetch maximum bits required for activations quantization.
172
+ max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
173
+ node_output_size = n.get_total_output_params()
174
+ for cut_index in node_to_cat_mapping[n.name]:
175
+ activation_outputs[cut_index] += node_output_size
176
+ # Calculate activation size in bytes and append to list
177
+ activation_outputs_bytes[cut_index] += node_output_size * max_activation_bits / BITS_TO_BYTES
178
+
179
+ return activation_outputs_bytes, activation_outputs
180
+
181
+
182
+ # TODO maxcut: add test for this function and remove no cover
183
+ def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]: # pragma: no cover
136
184
  """
137
185
  Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
138
186
  each node.
@@ -146,9 +194,7 @@ def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarra
146
194
  calculated using the maximal bit-width for quantization.
147
195
  - The second array represents the size of each node's activation output tensor size.
148
196
 
149
-
150
197
  """
151
-
152
198
  activation_outputs = []
153
199
  activation_outputs_bytes = []
154
200
  for n in graph.nodes:
@@ -238,16 +284,17 @@ def requires_mixed_precision(in_model: Any,
238
284
  total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)
239
285
 
240
286
  # Compute max activation tensor in bytes
241
- activation_output_sizes_bytes, _ = compute_activation_output_sizes(transformed_graph)
242
- max_activation_tensor_size_bytes = 0 if len(activation_output_sizes_bytes) == 0 else max(activation_output_sizes_bytes)
287
+ activation_memory_estimation_bytes, _ = compute_activation_output_maxcut_sizes(transformed_graph)
288
+ max_activation_memory_estimation_bytes = 0 if len(activation_memory_estimation_bytes) == 0 \
289
+ else max(activation_memory_estimation_bytes)
243
290
 
244
291
  # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
245
292
  bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
246
293
  bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
247
294
 
248
295
  is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
249
- is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_tensor_size_bytes
250
- is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_tensor_size_bytes
296
+ is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_memory_estimation_bytes
297
+ is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_memory_estimation_bytes
251
298
  is_mixed_precision |= target_resource_utilization.bops < bops_count
252
299
  return is_mixed_precision
253
300
 
@@ -28,6 +28,6 @@ class RuFunctions(NamedTuple):
28
28
 
29
29
 
30
30
  ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
31
- RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
31
+ RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_MAXCUT_SIZE, MpRuAggregation.MAX),
32
32
  RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
33
33
  RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
@@ -14,7 +14,8 @@
14
14
  # ==============================================================================
15
15
  from enum import Enum
16
16
  from functools import partial
17
- from typing import List
17
+ from typing import List, Optional
18
+ from copy import deepcopy
18
19
 
19
20
  import numpy as np
20
21
 
@@ -25,6 +26,8 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
25
26
  from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
26
27
  from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
27
28
  VirtualSplitWeightsNode, VirtualSplitActivationNode
29
+ from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
30
+ from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
28
31
  from model_compression_toolkit.logger import Logger
29
32
 
30
33
 
@@ -87,10 +90,91 @@ def weights_size_utilization(mp_cfg: List[int],
87
90
  return np.array(weights_memory)
88
91
 
89
92
 
93
+ def calc_graph_cuts(graph: Graph) -> List[Cut]:
94
+ """
95
+ Calculate graph activation cuts.
96
+ Args:
97
+ graph: A graph object to calculate activation cuts on.
98
+
99
+ Returns:
100
+ A list of activation cuts.
101
+
102
+ """
103
+ memory_graph = MemoryGraph(deepcopy(graph))
104
+ _, _, cuts = compute_graph_max_cut(memory_graph)
105
+
106
+ if cuts is None:
107
+ Logger.critical("Failed to calculate activation memory cuts for graph.") # pragma: no cover
108
+ # filter empty cuts and cuts that contain only nodes with activation quantization disabled.
109
+ filtered_cuts = []
110
+ for cut in cuts:
111
+ cut_has_no_act_quant_nodes = any(
112
+ [graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
113
+ for e in cut.mem_elements.elements])
114
+ if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
115
+ filtered_cuts.append(cut)
116
+ return filtered_cuts
117
+
118
+
119
+ def activation_maxcut_size_utilization(mp_cfg: List[int],
120
+ graph: Graph,
121
+ fw_info: FrameworkInfo,
122
+ fw_impl: FrameworkImplementation,
123
+ cuts: Optional[List[Cut]] = None) -> np.ndarray:
124
+ """
125
+ Computes a resource utilization vector with the respective output memory max-cut size for activation
126
+ nodes, according to the given mixed-precision configuration.
127
+
128
+ Args:
129
+ mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
130
+ graph: Graph object.
131
+ fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
132
+ (not used in this method).
133
+ fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
134
+ cuts: a list of graph cuts (optional. if not provided calculated locally).
135
+ TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
136
+
137
+ Returns: A vector of node's cut memory sizes.
138
+ Note that the vector is not necessarily of the same length as the given config.
139
+
140
+ """
141
+ if len(mp_cfg) == 0:
142
+ # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
143
+ # configurable nodes.
144
+ return np.array([])
145
+
146
+ activation_cut_memory = []
147
+ mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
148
+ # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
149
+ nodes_act_nbits = {}
150
+ for n in graph.get_sorted_activation_configurable_nodes():
151
+ node_idx = mp_nodes.index(n.name)
152
+ node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
153
+ node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
154
+ nodes_act_nbits[n.name] = node_nbits
155
+
156
+ if cuts is None:
157
+ cuts = calc_graph_cuts(graph)
158
+
159
+ for i, cut in enumerate(cuts):
160
+ mem_elements = [m.node_name for m in cut.mem_elements.elements]
161
+ mem = 0
162
+ for op_name in mem_elements:
163
+ n = graph.find_node_by_name(op_name)[0]
164
+ if n.is_activation_quantization_enabled():
165
+ base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
166
+ mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
167
+
168
+ activation_cut_memory.append(mem)
169
+
170
+ return np.array(activation_cut_memory)
171
+
172
+
173
+ # TODO maxcut: add test for this function and remove no cover
90
174
  def activation_output_size_utilization(mp_cfg: List[int],
91
175
  graph: Graph,
92
176
  fw_info: FrameworkInfo,
93
- fw_impl: FrameworkImplementation) -> np.ndarray:
177
+ fw_impl: FrameworkImplementation) -> np.ndarray: # pragma: no cover
94
178
  """
95
179
  Computes a resource utilization vector with the respective output memory size for each activation configurable node,
96
180
  according to the given mixed-precision configuration.
@@ -424,6 +508,8 @@ class MpRuMetric(Enum):
424
508
 
425
509
  WEIGHTS_SIZE - applies the weights_size_utilization function
426
510
 
511
+ ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
512
+
427
513
  ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
428
514
 
429
515
  TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
@@ -433,6 +519,7 @@ class MpRuMetric(Enum):
433
519
  """
434
520
 
435
521
  WEIGHTS_SIZE = partial(weights_size_utilization)
522
+ ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
436
523
  ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
437
524
  TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
438
525
  BOPS_COUNT = partial(bops_utilization)
@@ -27,7 +27,7 @@ SOLVER_TIME_LIMIT = 60
27
27
 
28
28
 
29
29
  def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
30
- target_resource_utilization: ResourceUtilization = None) -> List[int]:
30
+ target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
31
31
  """
32
32
  Searching and returning a mixed-precision configuration using an ILP optimization solution.
33
33
  It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
@@ -44,7 +44,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
44
44
  consumption).
45
45
 
46
46
  Returns:
47
- The mixed-precision configuration (list of indices. Each indicates the bitwidth index of a node).
47
+ The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
48
48
 
49
49
  """
50
50
 
@@ -58,6 +58,7 @@ def flat_gen_fn(data_gen_fn: Callable[[], Generator]):
58
58
 
59
59
  return gen
60
60
 
61
+
61
62
  class TFDatasetFromGenerator:
62
63
  """
63
64
  TensorFlow dataset from a data generator function, batched to a specified size.
@@ -70,7 +71,7 @@ class TFDatasetFromGenerator:
70
71
  """
71
72
  inputs = next(data_gen_fn())
72
73
  if not isinstance(inputs, list):
73
- raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
74
+ raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}') # pragma: no cover
74
75
  self.orig_batch_size = inputs[0].shape[0]
75
76
  self._size = None
76
77
 
@@ -78,7 +79,6 @@ class TFDatasetFromGenerator:
78
79
  output_signature = get_tensor_spec(inputs, ignore_batch_dim=True)
79
80
  self.dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen_fn), output_signature=output_signature)
80
81
 
81
-
82
82
  def __iter__(self):
83
83
  return iter(self.dataset)
84
84
 
@@ -89,7 +89,6 @@ class TFDatasetFromGenerator:
89
89
  return self._size
90
90
 
91
91
 
92
-
93
92
  class FixedTFDataset:
94
93
  """
95
94
  Fixed dataset containing samples from a generator, stored in memory.
@@ -103,7 +102,7 @@ class FixedTFDataset:
103
102
  """
104
103
  inputs = next(data_gen_fn())
105
104
  if not isinstance(inputs, list):
106
- raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
105
+ raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}') # pragma: no cover
107
106
  self.orig_batch_size = inputs[0].shape[0]
108
107
 
109
108
  samples = []
@@ -131,7 +130,7 @@ class FixedSampleInfoDataset:
131
130
 
132
131
  def __init__(self, samples: Sequence, sample_info: Sequence):
133
132
  if not all(len(info) == len(samples) for info in sample_info):
134
- raise ValueError('Sample and additional info lengths must match')
133
+ raise ValueError('Sample and additional info lengths must match') # pragma: no cover
135
134
  self.samples = samples
136
135
  self.sample_info = sample_info
137
136
 
@@ -20,7 +20,7 @@ from packaging import version
20
20
  if version.parse(tf.__version__) >= version.parse("2.13"):
21
21
  from keras.src.layers.core import TFOpLambda
22
22
  from keras.src.layers import Conv2D, DepthwiseConv2D
23
- else:
23
+ else: # pragma: no cover
24
24
  from keras.layers.core import TFOpLambda
25
25
  from keras.layers import Conv2D, DepthwiseConv2D
26
26
  from model_compression_toolkit.logger import Logger
@@ -68,8 +68,8 @@ class ScaledDotProductDecomposition(BaseSubstitution):
68
68
  output_shape[-2], output_shape[-1] = input_shape[-1], input_shape[-2]
69
69
  transpose_node = FunctionalNode(name=f"{attention_node_name}_{key_node.name}_transpose",
70
70
  framework_attr={},
71
- input_shape=input_shape,
72
- output_shape=output_shape,
71
+ input_shape=[input_shape],
72
+ output_shape=[output_shape],
73
73
  weights={},
74
74
  layer_class=torch.transpose,
75
75
  op_call_args=[-1, -2], # axes to transpose
@@ -99,7 +99,7 @@ class ScaledDotProductDecomposition(BaseSubstitution):
99
99
  def _get_matmul_node(self, attention_node_name: str, q_node: BaseNode, transposed_k_node: BaseNode) -> BaseNode:
100
100
  matmul1_output_shape = copy(q_node.output_shape[0])
101
101
  matmul1_output_shape[-2] = q_node.output_shape[0][-2]
102
- matmul1_output_shape[-1] = transposed_k_node.output_shape[-1]
102
+ matmul1_output_shape[-1] = transposed_k_node.output_shape[0][-1]
103
103
  matmul_name = f'{attention_node_name}_matmul1'
104
104
  return FunctionalNode(name=matmul_name,
105
105
  framework_attr={},
@@ -20,7 +20,7 @@ from typing import List, Any, Tuple, Callable, Type, Dict, Generator
20
20
  import numpy as np
21
21
  import torch
22
22
  from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
23
- from torch import sigmoid, softmax, add, cat, argmax
23
+ from torch import sigmoid, softmax, add, cat, argmax, concat, concatenate
24
24
  from torch.nn import Conv2d, ConvTranspose2d, Linear
25
25
  from torch.nn import Module, Sigmoid, Softmax
26
26
 
@@ -428,7 +428,8 @@ class PytorchImplementation(FrameworkImplementation):
428
428
  """
429
429
 
430
430
  return any(node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
431
- softmax, operator.add, add, cat, operator.concat])
431
+ softmax, operator.add, add, cat, concat, concatenate,
432
+ operator.concat])
432
433
 
433
434
  def get_mp_node_distance_fn(self, n: BaseNode,
434
435
  compute_distance_fn: Callable = None,
@@ -110,7 +110,7 @@ def _extract_torch_layer_data(node_module: torch.nn.Module) -> Tuple[Any, Dict[s
110
110
  """
111
111
  node_type = type(node_module)
112
112
  if not isinstance(node_module, torch.nn.Module):
113
- Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}")
113
+ Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}") # pragma: no cover
114
114
  # Extract the instance framework_attr (i.e. the arguments the class instance was initialized with). "fullargspec"
115
115
  # is a list of the layer's attribute names, that will be used as keys of the framework_attr dictionary. We the
116
116
  # values from the layer instance.
@@ -147,12 +147,14 @@ def _extract_input_and_output_shapes(_node: Node) -> Tuple[List, List]:
147
147
 
148
148
  if _node.meta[TYPE] == torch.Tensor:
149
149
  output_shape = [list(_node.meta[TENSOR_META].shape)]
150
+ elif _node.meta[TYPE] == torch.Size:
151
+ output_shape = [[len(input_shape[0])]] if len(input_shape) > 0 else [[]]
150
152
  elif _node.meta[TYPE] in (list, tuple):
151
153
  output_shape = [list(m.shape) for m in _node.meta.get(TENSOR_META, [])]
152
- elif _node.meta[TYPE] == int:
154
+ elif _node.meta[TYPE] in [int, bool]:
153
155
  output_shape = [[1]]
154
156
  else:
155
- output_shape = []
157
+ output_shape = [[]]
156
158
 
157
159
  return input_shape, output_shape
158
160
 
@@ -219,16 +221,16 @@ def nodes_builder(model: GraphModule,
219
221
  elif hasattr(torch.Tensor, node.target):
220
222
  node_type = getattr(torch.Tensor, node.target)
221
223
  else:
222
- Logger.critical(f"The call method '{node.target}' in {node} is not supported.")
224
+ Logger.critical(f"The call method '{node.target}' in {node} is not supported.") # pragma: no cover
223
225
 
224
226
  elif node.op == GET_ATTR:
225
227
  # Node holding a constant -> add to consts_dict so can add them later to weights of next node.
226
228
  if node.target in consts_dict:
227
- Logger.critical('A constant weight appears to have been recorded multiple times.')
229
+ Logger.critical('A constant weight appears to have been recorded multiple times.') # pragma: no cover
228
230
  consts_dict[node] = model_parameters_and_buffers[node.target]
229
231
  continue
230
232
  else:
231
- Logger.critical(f'Encountered an unsupported node type in node: {node.name}.')
233
+ Logger.critical(f'Encountered an unsupported node type in node: {node.name}.') # pragma: no cover
232
234
 
233
235
  # Add constants to weights dictionary.
234
236
  if node.op != PLACEHOLDER: