mct-nightly 2.2.0.20241224.532__py3-none-any.whl → 2.2.0.20241230.534__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/RECORD +21 -21
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/fusion/graph_fuser.py +8 -7
- model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +6 -6
- model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py +14 -8
- model_compression_toolkit/core/common/graph/memory_graph/memory_element.py +6 -1
- model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py +14 -2
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +47 -13
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +55 -8
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +89 -2
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +2 -2
- model_compression_toolkit/core/keras/data_util.py +4 -5
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py +3 -3
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +3 -2
- model_compression_toolkit/core/pytorch/reader/graph_builders.py +8 -6
- {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=iPjPjwwH50JpoFzcLJDS6XEjBVsJQmsuUenuZxUXTPg,1573
|
2
2
|
model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -29,7 +29,7 @@ model_compression_toolkit/core/common/collectors/mean_collector.py,sha256=mjr3U_
|
|
29
29
|
model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.py,sha256=5oKsJEKdVmj4C7fKdHhmrFN5k4G2BaFETpmf_xKNs7s,5207
|
30
30
|
model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=vcf7Pk1v09SJC4fbAWf_8AgTktE6tPizJbQpSmocP2U,7930
|
31
31
|
model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
32
|
-
model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=
|
32
|
+
model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=b41_4rL_Adiza4vpWlmmqgvkpUmWVdfdx0nEIB0p2n8,6195
|
33
33
|
model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
|
34
34
|
model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
|
35
35
|
model_compression_toolkit/core/common/graph/base_graph.py,sha256=6jlwj4WDT3XJKf1QQ4Aougswhl-Xx51QzV58CePbjVg,37818
|
@@ -41,11 +41,11 @@ model_compression_toolkit/core/common/graph/graph_searches.py,sha256=2oKuW6L8hP-
|
|
41
41
|
model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py,sha256=3el-A7j1oyoo1_9zq3faQp7IeRsFXFCvnrb3zZFXpU0,9803
|
42
42
|
model_compression_toolkit/core/common/graph/memory_graph/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
43
43
|
model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py,sha256=X6FK3C3y8ixFRPjC_wm3ClloCX8_06SOdA1TRi7o_LA,3800
|
44
|
-
model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256
|
44
|
+
model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=7KbAQ21mToemAjmpsUKknRzoljOaQ62VTxUSsLTSskU,2877
|
45
45
|
model_compression_toolkit/core/common/graph/memory_graph/cut.py,sha256=aPdXJPP5a5Rnu5Z5XqTZZkuGtdgHVu0RmX_NOfNM6Tc,2470
|
46
|
-
model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=
|
47
|
-
model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=
|
48
|
-
model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=
|
46
|
+
model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=gOVnHt0JaZvyuA0T1oTFSR59cwFO46E5OOC53h19nvg,17839
|
47
|
+
model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=ISD2BvJWj5mB91jrFjG8VQb0oOoLBoita_thCZWzCPI,4238
|
48
|
+
model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=3OC8kMXuzBv-R7wWmKY-i1AQNAr5x3LBZ4aj7hHF-cQ,7791
|
49
49
|
model_compression_toolkit/core/common/hessian/__init__.py,sha256=E7LK3K_1AwMCQokanNc1JODMwUKNOKmwXQiGQ7GO10I,1033
|
50
50
|
model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=YynbVHdHH2gPlk1QHXH6GygIkXRZ9qxR14cpgKrHPT0,13238
|
51
51
|
model_compression_toolkit/core/common/hessian/hessian_info_utils.py,sha256=1axmN0tjJSo_7hUr2d2KMv4y1pBi19cqWSQpi4BbdsA,1458
|
@@ -66,18 +66,18 @@ model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha2
|
|
66
66
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=15PbLAfuIyQInFczPka_MuyO4AJzAaOm9bOi3bzllxc,4531
|
67
67
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
|
68
68
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=B7xLl8P5eCz0_fBxocDlNiv6k-3MdfMUk2GjYKl2p5k,7522
|
69
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=
|
69
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=UGoIp5Fb8WwZpBSrEr_KO7SRdhSg5XuZq2ZzjL1ILuc,39296
|
70
70
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=adjuvrJcan7Ua3nYlJX7T6qGkCRHGqWMaM5-099a9Us,27220
|
71
71
|
model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
|
72
72
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=cjmHFU4peJ6qYP8lsIkYYSLvRddDbiSQ6mPZnZy0p6U,7905
|
73
73
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=MtPkZfPIJWI191Hbjp6JluUyLnqiJRi3zNf-CqVNuag,5053
|
75
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=
|
75
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=zVdOL80tbVAGUBT-JzeyBNGXASmutJTCTW0G6AQz7WY,17319
|
76
76
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py,sha256=PmuVXCKgwRNvG7pLGdA24Ren1lFH5hW51_FrOmUVHwU,4199
|
77
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=
|
78
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=
|
77
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=Z-cFOGUysk33OQgxZrmqn6dvMorR4m3xTgxjuLkplbs,1850
|
78
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=wctoYZE2vskmlPKvGMZ3UVBhyKQVP362crh_k0D_Bx0,25538
|
79
79
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
80
|
-
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=
|
80
|
+
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=c_msFU7zoBpkcT9_-P-OLyPTDd9hZEdFjFUQ0Y9pLaY,16574
|
81
81
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
82
82
|
model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
|
83
83
|
model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
|
@@ -155,7 +155,7 @@ model_compression_toolkit/core/common/visualization/tensorboard_writer.py,sha256
|
|
155
155
|
model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
156
156
|
model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiOcnLAq1v0MMBCJqMJzzFk,3225
|
157
157
|
model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
|
158
|
-
model_compression_toolkit/core/keras/data_util.py,sha256=
|
158
|
+
model_compression_toolkit/core/keras/data_util.py,sha256=HQj3-GP5oT5JHpYt80mtKhZjTCvKYs6c3Ll0txEgKHQ,6892
|
159
159
|
model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
|
160
160
|
model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
|
161
161
|
model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
|
@@ -176,7 +176,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm
|
|
176
176
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_reconstruction.py,sha256=GR1a3mCZpNUu4WxixJXF_aSm57phAdxaRoHecNx3hxw,3168
|
177
177
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_refusing.py,sha256=5df_xGfXkqNub4xVRnCWQvSohWqdv12axjJ6edVU2H0,2478
|
178
178
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/concat_threshold_update.py,sha256=Hl4LEQ_bw_Vpmf3ZqHujYUqVdvTNsPlEMvr9dZhwg2U,2806
|
179
|
-
model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=
|
179
|
+
model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=YHEh3rtTD61doT_oz8Tw7fg5AKKHxXvbpW_GmVbSUVw,11651
|
180
180
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py,sha256=R3U7cjc2E0zheMem16GHygp5jZFGSaomkNOTxTjcAgw,5794
|
181
181
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py,sha256=V6hp67CkS_A3WqdsjLjs0ETtdZAOo4P9mhy4aT7W5FE,5940
|
182
182
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py,sha256=AvquvVVVT8-ioeVn-gjqysK4L41L3I7TlNOEDfWjViY,8185
|
@@ -223,7 +223,7 @@ model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVv
|
|
223
223
|
model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
|
224
224
|
model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
|
225
225
|
model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
|
226
|
-
model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=
|
226
|
+
model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Mfdq15JLKWAAkpnOt_urcPOVXhqONTvzORyLOG-_Klo,29659
|
227
227
|
model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
|
228
228
|
model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
|
229
229
|
model_compression_toolkit/core/pytorch/utils.py,sha256=7VbgcLwtQvdEEc_AJgSOQ3U3KRKCICFPaBirN1fIQxg,3940
|
@@ -254,7 +254,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_
|
|
254
254
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py,sha256=hAZXzrEinHa-dJHLj39Hy_9Q-13QyO95rtYVSLrhvT8,4915
|
255
255
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py,sha256=DcJEIkGvBdIMOelNIwaJUZ5UsAHiGnDJPR20I464vWo,2929
|
256
256
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py,sha256=XFtU9yuBmoZlX0f0mS6otMPWMk-RcWs94XdvvTNhW8Y,3303
|
257
|
-
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=
|
257
|
+
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=SBtIuxb1Q2oUMJKSrAyN2wuaY4k1tsKt7qql0dP_PE0,12473
|
258
258
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/shift_negative_activation.py,sha256=3WCLvPyx7tVkM0rwYhYq-gntCzW9R_DcImR1ucKlPac,10772
|
259
259
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/softmax_shift.py,sha256=05lV4pIL3hJkZl4JQPV4wk_EFD0eYLG5b8cdzvZk4P8,1588
|
260
260
|
model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/transform_function_call_method.py,sha256=EC9Dvp-_UlpDWnipnf8ds65wh_Y-T8pXAFIwRScWpiY,2044
|
@@ -273,7 +273,7 @@ model_compression_toolkit/core/pytorch/quantizer/__init__.py,sha256=Rf1RcYmelmdZ
|
|
273
273
|
model_compression_toolkit/core/pytorch/quantizer/fake_quant_builder.py,sha256=D8_CEuFqKAhbUgKaRw7Jlxo0zlqgPTMu6CIIIM4LfS0,7045
|
274
274
|
model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py,sha256=uyeBtNokyDUikk-YkDP_mN_2DX0J5oPm3kSfdSUT2Ck,4420
|
275
275
|
model_compression_toolkit/core/pytorch/reader/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
276
|
-
model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=
|
276
|
+
model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=RBNhPuz02kstVVIDibHUES_Skn9feg3gOGbQylM8h-A,19547
|
277
277
|
model_compression_toolkit/core/pytorch/reader/node_holders.py,sha256=7XNc7-l1MZPJGcOESvtAwfIMxrU6kvt3YjF5B7qOqK4,1048
|
278
278
|
model_compression_toolkit/core/pytorch/reader/reader.py,sha256=GEJE0QX8XJFWbYCkbRBtzttZtmmuoACLx8gw9KyAQCE,6015
|
279
279
|
model_compression_toolkit/core/pytorch/statistics_correction/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
@@ -560,8 +560,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
560
560
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
|
561
561
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
562
562
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
563
|
-
mct_nightly-2.2.0.
|
564
|
-
mct_nightly-2.2.0.
|
565
|
-
mct_nightly-2.2.0.
|
566
|
-
mct_nightly-2.2.0.
|
567
|
-
mct_nightly-2.2.0.
|
563
|
+
mct_nightly-2.2.0.20241230.534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
564
|
+
mct_nightly-2.2.0.20241230.534.dist-info/METADATA,sha256=yyeiq5zHdWmEdPcLFzs1V6GQR86hrjTPm05Nbj-rGCs,26453
|
565
|
+
mct_nightly-2.2.0.20241230.534.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
566
|
+
mct_nightly-2.2.0.20241230.534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
567
|
+
mct_nightly-2.2.0.20241230.534.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.
|
30
|
+
__version__ = "2.2.0.20241230.000534"
|
@@ -36,10 +36,10 @@ class GraphFuser:
|
|
36
36
|
The fusion process involves:
|
37
37
|
1. Creating new fused nodes to represent these groups.
|
38
38
|
2. Updating the graph structure to replace the original nodes with fused nodes.
|
39
|
-
3. Maintaining mapping
|
39
|
+
3. Maintaining mapping of original node names to their fused node names.
|
40
40
|
|
41
41
|
Args:
|
42
|
-
graph: Graph to
|
42
|
+
graph: Graph to fuse its nodes.
|
43
43
|
|
44
44
|
Returns:
|
45
45
|
Mapping of original node names to their fused node names
|
@@ -54,7 +54,8 @@ class GraphFuser:
|
|
54
54
|
fused_nodes_mapping[node.name] = new_fused_node.name
|
55
55
|
return fused_nodes_mapping
|
56
56
|
|
57
|
-
|
57
|
+
@staticmethod
|
58
|
+
def _create_fused_node(nodes: List[BaseNode]) -> BaseNode:
|
58
59
|
"""
|
59
60
|
Create a new node that represents the fusion of the given nodes.
|
60
61
|
|
@@ -79,10 +80,10 @@ class GraphFuser:
|
|
79
80
|
|
80
81
|
return fused_node
|
81
82
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
83
|
+
@staticmethod
|
84
|
+
def _replace_nodes_with_fused_node(graph: Graph,
|
85
|
+
nodes_to_fuse: List[BaseNode],
|
86
|
+
fused_node: BaseNode):
|
86
87
|
"""
|
87
88
|
Replace the specified nodes in the graph with a new fused node.
|
88
89
|
|
@@ -51,13 +51,13 @@ def compute_graph_max_cut(memory_graph: MemoryGraph,
|
|
51
51
|
estimate = (u_bound + l_bound) / 2
|
52
52
|
schedule, max_cut_size, cuts = max_cut_astar.solve(estimate_factor=estimate, iter_limit=astar_n_iter)
|
53
53
|
if schedule is None:
|
54
|
-
|
54
|
+
l_bound = estimate
|
55
|
+
else:
|
56
|
+
u_bound = min(estimate, max_cut_size)
|
57
|
+
last_result = (schedule, max_cut_size, cuts)
|
55
58
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
if l_bound * (1 + eps) >= next_u_bound:
|
60
|
-
return last_result
|
59
|
+
if l_bound * (1 + eps) >= u_bound:
|
60
|
+
return last_result
|
61
61
|
|
62
62
|
it += 1
|
63
63
|
|
@@ -154,6 +154,9 @@ class MaxCutAstar:
|
|
154
154
|
cut_route = routes[next_cut]
|
155
155
|
|
156
156
|
if next_cut == self.target_cut:
|
157
|
+
# TODO maxcut: Why do we filter the cuts (cut_route) but not the max cut size (cut_sost).
|
158
|
+
# This is a mismatch between max_cut and max(cuts).
|
159
|
+
# Also, unfiltered cut_route seems perfect, including input and output tensor sizes of current op.
|
157
160
|
return self._remove_dummys_from_path(cut_route[0].op_order), cut_cost,\
|
158
161
|
list(set([self._remove_dummys_from_cut(self.clean_memory_for_next_step(c)) for c in cut_route]))
|
159
162
|
|
@@ -178,7 +181,8 @@ class MaxCutAstar:
|
|
178
181
|
cost = self.accumulate(cut_cost, c.memory_size())
|
179
182
|
if c not in open_list:
|
180
183
|
self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
|
181
|
-
|
184
|
+
# TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
|
185
|
+
elif self.ordering(cost, costs[c]): # pragma: no cover
|
182
186
|
# If we already saw this cut during the search with a larger cost, then we want to update the order
|
183
187
|
# of the schedule in the cut
|
184
188
|
# Remove call - removes the cut with the same memory elements but different ordering from open
|
@@ -187,7 +191,8 @@ class MaxCutAstar:
|
|
187
191
|
self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
|
188
192
|
|
189
193
|
# Halt or No Solution
|
190
|
-
|
194
|
+
# TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
|
195
|
+
return None, 0, None # pragma: no cover
|
191
196
|
|
192
197
|
@staticmethod
|
193
198
|
def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: List[Cut],
|
@@ -223,8 +228,7 @@ class MaxCutAstar:
|
|
223
228
|
|
224
229
|
"""
|
225
230
|
ordered_cuts_list = sorted(open_list,
|
226
|
-
key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), len(routes[c]))
|
227
|
-
reverse=False)
|
231
|
+
key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), -len(routes[c])))
|
228
232
|
|
229
233
|
assert len(ordered_cuts_list) > 0
|
230
234
|
return ordered_cuts_list[0]
|
@@ -349,7 +353,8 @@ class MaxCutAstar:
|
|
349
353
|
Returns: True if the first cost is smaller than the second one, else otherwise.
|
350
354
|
|
351
355
|
"""
|
352
|
-
|
356
|
+
# TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
|
357
|
+
return cost_1 < cost_2 # pragma: no cover
|
353
358
|
|
354
359
|
def estimate(self, cut: Cut, estimate_factor: float) -> float:
|
355
360
|
"""
|
@@ -377,9 +382,10 @@ class MaxCutAstar:
|
|
377
382
|
Returns: An initial estimate value.
|
378
383
|
|
379
384
|
"""
|
380
|
-
|
381
|
-
|
382
|
-
|
385
|
+
# TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
|
386
|
+
l_bound = memory_graph.memory_lbound_single_op # pragma: no cover
|
387
|
+
u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound # pragma: no cover
|
388
|
+
return (u_bound + l_bound) / 2 # pragma: no cover
|
383
389
|
|
384
390
|
@staticmethod
|
385
391
|
def _remove_dummys_from_path(path: List[BaseNode]) -> List[BaseNode]:
|
@@ -30,7 +30,12 @@ class ActivationMemoryTensor:
|
|
30
30
|
init_size_to_zero: Whether to initialize the memory tensor size to 0 or not.
|
31
31
|
"""
|
32
32
|
|
33
|
-
|
33
|
+
# remove batch size (first element) from output shape. If the shape is a list then remove the first
|
34
|
+
# axis. If shape a vector (e.g. output of size) then set the shape minus 1 to ignore the batch value.
|
35
|
+
if len(shape) == 1:
|
36
|
+
self.shape = [] if shape[0] is None else [shape[0] - 1]
|
37
|
+
else:
|
38
|
+
self.shape = shape[1:]
|
34
39
|
# The total size of a tensor is considered to be the number of elements in the tensor
|
35
40
|
self.total_size = self._get_tensor_total_size() if not init_size_to_zero else 0
|
36
41
|
|
@@ -13,6 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
from typing import List
|
16
|
+
from operator import getitem
|
16
17
|
|
17
18
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
18
19
|
from model_compression_toolkit.core.common.graph.edge import EDGE_SOURCE_INDEX
|
@@ -45,7 +46,8 @@ class MemoryGraph(DirectedBipartiteGraph):
|
|
45
46
|
tensor_to_node = []
|
46
47
|
|
47
48
|
for n in nodes:
|
48
|
-
n_outputs =
|
49
|
+
n_outputs = n.output_shape if isinstance(n.output_shape[0], (tuple, list)) else [n.output_shape]
|
50
|
+
|
49
51
|
out_edges = model_graph.out_edges(n, sort_by_attr=EDGE_SOURCE_INDEX)
|
50
52
|
|
51
53
|
for i, ot in enumerate(n_outputs):
|
@@ -54,7 +56,16 @@ class MemoryGraph(DirectedBipartiteGraph):
|
|
54
56
|
# Add memory tensor as current node's output
|
55
57
|
node_to_tensor.append((n, memory_tensor))
|
56
58
|
|
57
|
-
|
59
|
+
# TODO maxcut: refactor this code. it handles split->getitem generated by fx.
|
60
|
+
ot_edges = []
|
61
|
+
for oe in out_edges:
|
62
|
+
if oe.sink_node.type is getitem and len(oe.sink_node.op_call_args) == 1 and isinstance(oe.sink_node.op_call_args[0], int):
|
63
|
+
source_index = oe.sink_node.op_call_args[0]
|
64
|
+
else:
|
65
|
+
source_index = oe.source_index
|
66
|
+
if source_index == i:
|
67
|
+
ot_edges.append(oe)
|
68
|
+
|
58
69
|
for oe in ot_edges:
|
59
70
|
# Add current memory tensor as input to current node's successors
|
60
71
|
tensor_to_node.append((memory_tensor, oe.sink_node))
|
@@ -71,6 +82,7 @@ class MemoryGraph(DirectedBipartiteGraph):
|
|
71
82
|
inputs_tensors_memory = [sum([t.total_size for t in self.operation_node_children(n)])
|
72
83
|
for n in nodes if n in model_graph.get_inputs()]
|
73
84
|
|
85
|
+
# TODO maxcut: why both inputs and outputs of each nodes, while the A* solves for node outputs only???
|
74
86
|
nodes_total_memory = [sum([t.total_size for t in self.operation_node_children(n)] +
|
75
87
|
[t.total_size for t in self.operation_node_parents(n)])
|
76
88
|
for n in nodes if n not in model_graph.get_inputs()]
|
@@ -24,8 +24,10 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph
|
|
24
24
|
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
|
25
25
|
VirtualSplitWeightsNode, VirtualSplitActivationNode
|
26
26
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
|
27
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
|
27
28
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
28
|
-
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
29
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
|
30
|
+
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
|
29
31
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
30
32
|
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
31
33
|
|
@@ -40,7 +42,7 @@ class MixedPrecisionSearchManager:
|
|
40
42
|
fw_info: FrameworkInfo,
|
41
43
|
fw_impl: FrameworkImplementation,
|
42
44
|
sensitivity_evaluator: SensitivityEvaluation,
|
43
|
-
ru_functions: Dict[RUTarget,
|
45
|
+
ru_functions: Dict[RUTarget, RuFunctions],
|
44
46
|
target_resource_utilization: ResourceUtilization,
|
45
47
|
original_graph: Graph = None):
|
46
48
|
"""
|
@@ -65,8 +67,11 @@ class MixedPrecisionSearchManager:
|
|
65
67
|
self.sensitivity_evaluator = sensitivity_evaluator
|
66
68
|
self.layer_to_bitwidth_mapping = self.get_search_space()
|
67
69
|
self.compute_metric_fn = self.get_sensitivity_metric()
|
70
|
+
self._cuts = None
|
68
71
|
|
69
|
-
|
72
|
+
ru_types = [ru_target for ru_target, ru_value in
|
73
|
+
target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
|
74
|
+
self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
|
70
75
|
self.target_resource_utilization = target_resource_utilization
|
71
76
|
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
|
72
77
|
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
|
@@ -76,6 +81,17 @@ class MixedPrecisionSearchManager:
|
|
76
81
|
self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
|
77
82
|
original_graph=self.original_graph)
|
78
83
|
|
84
|
+
@property
|
85
|
+
def cuts(self) -> List[Cut]:
|
86
|
+
"""
|
87
|
+
Calculates graph cuts. Written as property, so it will only be calculated once and
|
88
|
+
only if cuts are needed.
|
89
|
+
|
90
|
+
"""
|
91
|
+
if self._cuts is None:
|
92
|
+
self._cuts = calc_graph_cuts(self.original_graph)
|
93
|
+
return self._cuts
|
94
|
+
|
79
95
|
def get_search_space(self) -> Dict[int, List[int]]:
|
80
96
|
"""
|
81
97
|
The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
|
@@ -106,6 +122,21 @@ class MixedPrecisionSearchManager:
|
|
106
122
|
|
107
123
|
return self.sensitivity_evaluator.compute_metric
|
108
124
|
|
125
|
+
def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
|
126
|
+
"""
|
127
|
+
Computes a resource utilization for a certain mixed precision configuration.
|
128
|
+
The method computes a resource utilization vector for specific target resource utilization.
|
129
|
+
|
130
|
+
Returns: resource utilization value.
|
131
|
+
|
132
|
+
"""
|
133
|
+
# ru_fn is a pair of resource utilization computation method and
|
134
|
+
# resource utilization aggregation method (in this method we only need the first one)
|
135
|
+
if ru_target is RUTarget.ACTIVATION:
|
136
|
+
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
|
137
|
+
else:
|
138
|
+
return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)
|
139
|
+
|
109
140
|
def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
|
110
141
|
"""
|
111
142
|
Computes a resource utilization vector with the values matching to the minimal mp configuration
|
@@ -118,10 +149,10 @@ class MixedPrecisionSearchManager:
|
|
118
149
|
|
119
150
|
"""
|
120
151
|
min_ru = {}
|
121
|
-
for ru_target,
|
122
|
-
# ru_fns is a pair of resource utilization computation method and
|
152
|
+
for ru_target, ru_fn in self.compute_ru_functions.items():
|
153
|
+
# ru_fns is a pair of resource utilization computation method and
|
123
154
|
# resource utilization aggregation method (in this method we only need the first one)
|
124
|
-
min_ru[ru_target] =
|
155
|
+
min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)
|
125
156
|
|
126
157
|
return min_ru
|
127
158
|
|
@@ -212,7 +243,7 @@ class MixedPrecisionSearchManager:
|
|
212
243
|
|
213
244
|
"""
|
214
245
|
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
|
215
|
-
return self.
|
246
|
+
return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
|
216
247
|
|
217
248
|
@staticmethod
|
218
249
|
def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
|
@@ -241,13 +272,15 @@ class MixedPrecisionSearchManager:
|
|
241
272
|
"""
|
242
273
|
|
243
274
|
non_conf_ru_dict = {}
|
244
|
-
for target,
|
275
|
+
for target, ru_fns in self.compute_ru_functions.items():
|
245
276
|
# Call for the ru method of the given target - empty quantization configuration list is passed since we
|
246
277
|
# compute for non-configurable nodes
|
247
278
|
if target == RUTarget.BOPS:
|
248
279
|
ru_vector = None
|
280
|
+
elif target == RUTarget.ACTIVATION:
|
281
|
+
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
|
249
282
|
else:
|
250
|
-
ru_vector =
|
283
|
+
ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)
|
251
284
|
|
252
285
|
non_conf_ru_dict[target] = ru_vector
|
253
286
|
|
@@ -266,14 +299,15 @@ class MixedPrecisionSearchManager:
|
|
266
299
|
"""
|
267
300
|
|
268
301
|
ru_dict = {}
|
269
|
-
|
270
302
|
for ru_target, ru_fns in self.compute_ru_functions.items():
|
271
303
|
# Passing False to ru methods and aggregations to indicates that the computations
|
272
304
|
# are not for constraints setting
|
273
305
|
if ru_target == RUTarget.BOPS:
|
274
|
-
configurable_nodes_ru_vector = ru_fns
|
306
|
+
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
|
307
|
+
elif ru_target == RUTarget.ACTIVATION:
|
308
|
+
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
|
275
309
|
else:
|
276
|
-
configurable_nodes_ru_vector = ru_fns
|
310
|
+
configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
|
277
311
|
non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
|
278
312
|
if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
|
279
313
|
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
|
@@ -647,7 +681,7 @@ class ConfigReconstructionHelper:
|
|
647
681
|
# It's ok, need to find the node's configuration
|
648
682
|
self.retrieve_weights_activation_config(activation_node, weights_node, virtual_node, virtual_cfg_idx, virtual_mp_cfg)
|
649
683
|
else:
|
650
|
-
Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{
|
684
|
+
Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{weights_node.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
|
651
685
|
|
652
686
|
def update_config_at_original_idx(self, n: BaseNode, origin_cfg_idx: int):
|
653
687
|
"""
|
@@ -13,10 +13,12 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
import copy
|
16
|
+
from collections import defaultdict
|
16
17
|
|
17
18
|
import numpy as np
|
18
19
|
from typing import Callable, Any, Dict, Tuple
|
19
20
|
|
21
|
+
from model_compression_toolkit.logger import Logger
|
20
22
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
|
21
23
|
from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
|
22
24
|
from model_compression_toolkit.core.common import Graph
|
@@ -25,6 +27,7 @@ from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
|
|
25
27
|
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
|
26
28
|
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
27
29
|
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions
|
30
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import calc_graph_cuts
|
28
31
|
|
29
32
|
|
30
33
|
def compute_resource_utilization_data(in_model: Any,
|
@@ -76,7 +79,7 @@ def compute_resource_utilization_data(in_model: Any,
|
|
76
79
|
total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)
|
77
80
|
|
78
81
|
# Compute max activation tensor
|
79
|
-
activation_output_sizes_bytes, activation_output_sizes =
|
82
|
+
activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_maxcut_sizes(graph=transformed_graph)
|
80
83
|
max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
|
81
84
|
|
82
85
|
# Compute total memory utilization - parameters sum + max activation tensor
|
@@ -132,7 +135,52 @@ def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[
|
|
132
135
|
|
133
136
|
return np.array(weights_memory_bytes), np.array(weights_params)
|
134
137
|
|
135
|
-
|
138
|
+
|
139
|
+
def compute_activation_output_maxcut_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
|
140
|
+
"""
|
141
|
+
Computes an array of the respective output tensor maxcut size and an array of the output tensor
|
142
|
+
cut size in bytes for each cut.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
graph: A finalized Graph object, representing the model structure.
|
146
|
+
|
147
|
+
Returns:
|
148
|
+
A tuple containing two arrays:
|
149
|
+
- The first is an array of the size of each activation max-cut size in bytes, calculated
|
150
|
+
using the maximal bit-width for quantization.
|
151
|
+
- The second array an array of the size of each activation max-cut activation size in number of parameters.
|
152
|
+
|
153
|
+
"""
|
154
|
+
cuts = calc_graph_cuts(graph)
|
155
|
+
|
156
|
+
# map nodes to cuts.
|
157
|
+
node_to_cat_mapping = defaultdict(list)
|
158
|
+
for i, cut in enumerate(cuts):
|
159
|
+
mem_element_names = [m.node_name for m in cut.mem_elements.elements]
|
160
|
+
for m_name in mem_element_names:
|
161
|
+
if len(graph.find_node_by_name(m_name)) > 0:
|
162
|
+
node_to_cat_mapping[m_name].append(i)
|
163
|
+
else:
|
164
|
+
Logger.critical(f"Missing node: {m_name}") # pragma: no cover
|
165
|
+
|
166
|
+
activation_outputs = np.zeros(len(cuts))
|
167
|
+
activation_outputs_bytes = np.zeros(len(cuts))
|
168
|
+
for n in graph.nodes:
|
169
|
+
# Go over all nodes that have activation quantization enabled.
|
170
|
+
if n.has_activation_quantization_enabled_candidate():
|
171
|
+
# Fetch maximum bits required for activations quantization.
|
172
|
+
max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
|
173
|
+
node_output_size = n.get_total_output_params()
|
174
|
+
for cut_index in node_to_cat_mapping[n.name]:
|
175
|
+
activation_outputs[cut_index] += node_output_size
|
176
|
+
# Calculate activation size in bytes and append to list
|
177
|
+
activation_outputs_bytes[cut_index] += node_output_size * max_activation_bits / BITS_TO_BYTES
|
178
|
+
|
179
|
+
return activation_outputs_bytes, activation_outputs
|
180
|
+
|
181
|
+
|
182
|
+
# TODO maxcut: add test for this function and remove no cover
|
183
|
+
def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]: # pragma: no cover
|
136
184
|
"""
|
137
185
|
Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
|
138
186
|
each node.
|
@@ -146,9 +194,7 @@ def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarra
|
|
146
194
|
calculated using the maximal bit-width for quantization.
|
147
195
|
- The second array represents the size of each node's activation output tensor size.
|
148
196
|
|
149
|
-
|
150
197
|
"""
|
151
|
-
|
152
198
|
activation_outputs = []
|
153
199
|
activation_outputs_bytes = []
|
154
200
|
for n in graph.nodes:
|
@@ -238,16 +284,17 @@ def requires_mixed_precision(in_model: Any,
|
|
238
284
|
total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)
|
239
285
|
|
240
286
|
# Compute max activation tensor in bytes
|
241
|
-
|
242
|
-
|
287
|
+
activation_memory_estimation_bytes, _ = compute_activation_output_maxcut_sizes(transformed_graph)
|
288
|
+
max_activation_memory_estimation_bytes = 0 if len(activation_memory_estimation_bytes) == 0 \
|
289
|
+
else max(activation_memory_estimation_bytes)
|
243
290
|
|
244
291
|
# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
|
245
292
|
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
|
246
293
|
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
|
247
294
|
|
248
295
|
is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
|
249
|
-
is_mixed_precision |= target_resource_utilization.activation_memory <
|
250
|
-
is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes +
|
296
|
+
is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_memory_estimation_bytes
|
297
|
+
is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_memory_estimation_bytes
|
251
298
|
is_mixed_precision |= target_resource_utilization.bops < bops_count
|
252
299
|
return is_mixed_precision
|
253
300
|
|
@@ -28,6 +28,6 @@ class RuFunctions(NamedTuple):
|
|
28
28
|
|
29
29
|
|
30
30
|
ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
|
31
|
-
RUTarget.ACTIVATION: RuFunctions(MpRuMetric.
|
31
|
+
RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_MAXCUT_SIZE, MpRuAggregation.MAX),
|
32
32
|
RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
|
33
33
|
RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py
CHANGED
@@ -14,7 +14,8 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
from enum import Enum
|
16
16
|
from functools import partial
|
17
|
-
from typing import List
|
17
|
+
from typing import List, Optional
|
18
|
+
from copy import deepcopy
|
18
19
|
|
19
20
|
import numpy as np
|
20
21
|
|
@@ -25,6 +26,8 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
|
|
25
26
|
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
|
26
27
|
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
|
27
28
|
VirtualSplitWeightsNode, VirtualSplitActivationNode
|
29
|
+
from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
|
30
|
+
from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
|
28
31
|
from model_compression_toolkit.logger import Logger
|
29
32
|
|
30
33
|
|
@@ -87,10 +90,91 @@ def weights_size_utilization(mp_cfg: List[int],
|
|
87
90
|
return np.array(weights_memory)
|
88
91
|
|
89
92
|
|
93
|
+
def calc_graph_cuts(graph: Graph) -> List[Cut]:
|
94
|
+
"""
|
95
|
+
Calculate graph activation cuts.
|
96
|
+
Args:
|
97
|
+
graph: A graph object to calculate activation cuts on.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
A list of activation cuts.
|
101
|
+
|
102
|
+
"""
|
103
|
+
memory_graph = MemoryGraph(deepcopy(graph))
|
104
|
+
_, _, cuts = compute_graph_max_cut(memory_graph)
|
105
|
+
|
106
|
+
if cuts is None:
|
107
|
+
Logger.critical("Failed to calculate activation memory cuts for graph.") # pragma: no cover
|
108
|
+
# filter empty cuts and cuts that contain only nodes with activation quantization disabled.
|
109
|
+
filtered_cuts = []
|
110
|
+
for cut in cuts:
|
111
|
+
cut_has_no_act_quant_nodes = any(
|
112
|
+
[graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
|
113
|
+
for e in cut.mem_elements.elements])
|
114
|
+
if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
|
115
|
+
filtered_cuts.append(cut)
|
116
|
+
return filtered_cuts
|
117
|
+
|
118
|
+
|
119
|
+
def activation_maxcut_size_utilization(mp_cfg: List[int],
|
120
|
+
graph: Graph,
|
121
|
+
fw_info: FrameworkInfo,
|
122
|
+
fw_impl: FrameworkImplementation,
|
123
|
+
cuts: Optional[List[Cut]] = None) -> np.ndarray:
|
124
|
+
"""
|
125
|
+
Computes a resource utilization vector with the respective output memory max-cut size for activation
|
126
|
+
nodes, according to the given mixed-precision configuration.
|
127
|
+
|
128
|
+
Args:
|
129
|
+
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
130
|
+
graph: Graph object.
|
131
|
+
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
|
132
|
+
(not used in this method).
|
133
|
+
fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
|
134
|
+
cuts: a list of graph cuts (optional. if not provided calculated locally).
|
135
|
+
TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
|
136
|
+
|
137
|
+
Returns: A vector of node's cut memory sizes.
|
138
|
+
Note that the vector is not necessarily of the same length as the given config.
|
139
|
+
|
140
|
+
"""
|
141
|
+
if len(mp_cfg) == 0:
|
142
|
+
# Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
|
143
|
+
# configurable nodes.
|
144
|
+
return np.array([])
|
145
|
+
|
146
|
+
activation_cut_memory = []
|
147
|
+
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
148
|
+
# Go over all nodes that should be taken into consideration when computing the weights memory utilization.
|
149
|
+
nodes_act_nbits = {}
|
150
|
+
for n in graph.get_sorted_activation_configurable_nodes():
|
151
|
+
node_idx = mp_nodes.index(n.name)
|
152
|
+
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
153
|
+
node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
|
154
|
+
nodes_act_nbits[n.name] = node_nbits
|
155
|
+
|
156
|
+
if cuts is None:
|
157
|
+
cuts = calc_graph_cuts(graph)
|
158
|
+
|
159
|
+
for i, cut in enumerate(cuts):
|
160
|
+
mem_elements = [m.node_name for m in cut.mem_elements.elements]
|
161
|
+
mem = 0
|
162
|
+
for op_name in mem_elements:
|
163
|
+
n = graph.find_node_by_name(op_name)[0]
|
164
|
+
if n.is_activation_quantization_enabled():
|
165
|
+
base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
|
166
|
+
mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
|
167
|
+
|
168
|
+
activation_cut_memory.append(mem)
|
169
|
+
|
170
|
+
return np.array(activation_cut_memory)
|
171
|
+
|
172
|
+
|
173
|
+
# TODO maxcut: add test for this function and remove no cover
|
90
174
|
def activation_output_size_utilization(mp_cfg: List[int],
|
91
175
|
graph: Graph,
|
92
176
|
fw_info: FrameworkInfo,
|
93
|
-
fw_impl: FrameworkImplementation) -> np.ndarray:
|
177
|
+
fw_impl: FrameworkImplementation) -> np.ndarray: # pragma: no cover
|
94
178
|
"""
|
95
179
|
Computes a resource utilization vector with the respective output memory size for each activation configurable node,
|
96
180
|
according to the given mixed-precision configuration.
|
@@ -424,6 +508,8 @@ class MpRuMetric(Enum):
|
|
424
508
|
|
425
509
|
WEIGHTS_SIZE - applies the weights_size_utilization function
|
426
510
|
|
511
|
+
ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
|
512
|
+
|
427
513
|
ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
|
428
514
|
|
429
515
|
TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
|
@@ -433,6 +519,7 @@ class MpRuMetric(Enum):
|
|
433
519
|
"""
|
434
520
|
|
435
521
|
WEIGHTS_SIZE = partial(weights_size_utilization)
|
522
|
+
ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
|
436
523
|
ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
|
437
524
|
TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
|
438
525
|
BOPS_COUNT = partial(bops_utilization)
|
@@ -27,7 +27,7 @@ SOLVER_TIME_LIMIT = 60
|
|
27
27
|
|
28
28
|
|
29
29
|
def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
|
30
|
-
target_resource_utilization: ResourceUtilization = None) ->
|
30
|
+
target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
|
31
31
|
"""
|
32
32
|
Searching and returning a mixed-precision configuration using an ILP optimization solution.
|
33
33
|
It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
|
@@ -44,7 +44,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
|
|
44
44
|
consumption).
|
45
45
|
|
46
46
|
Returns:
|
47
|
-
The mixed-precision configuration (
|
47
|
+
The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
|
48
48
|
|
49
49
|
"""
|
50
50
|
|
@@ -58,6 +58,7 @@ def flat_gen_fn(data_gen_fn: Callable[[], Generator]):
|
|
58
58
|
|
59
59
|
return gen
|
60
60
|
|
61
|
+
|
61
62
|
class TFDatasetFromGenerator:
|
62
63
|
"""
|
63
64
|
TensorFlow dataset from a data generator function, batched to a specified size.
|
@@ -70,7 +71,7 @@ class TFDatasetFromGenerator:
|
|
70
71
|
"""
|
71
72
|
inputs = next(data_gen_fn())
|
72
73
|
if not isinstance(inputs, list):
|
73
|
-
raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
|
74
|
+
raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}') # pragma: no cover
|
74
75
|
self.orig_batch_size = inputs[0].shape[0]
|
75
76
|
self._size = None
|
76
77
|
|
@@ -78,7 +79,6 @@ class TFDatasetFromGenerator:
|
|
78
79
|
output_signature = get_tensor_spec(inputs, ignore_batch_dim=True)
|
79
80
|
self.dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen_fn), output_signature=output_signature)
|
80
81
|
|
81
|
-
|
82
82
|
def __iter__(self):
|
83
83
|
return iter(self.dataset)
|
84
84
|
|
@@ -89,7 +89,6 @@ class TFDatasetFromGenerator:
|
|
89
89
|
return self._size
|
90
90
|
|
91
91
|
|
92
|
-
|
93
92
|
class FixedTFDataset:
|
94
93
|
"""
|
95
94
|
Fixed dataset containing samples from a generator, stored in memory.
|
@@ -103,7 +102,7 @@ class FixedTFDataset:
|
|
103
102
|
"""
|
104
103
|
inputs = next(data_gen_fn())
|
105
104
|
if not isinstance(inputs, list):
|
106
|
-
raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
|
105
|
+
raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}') # pragma: no cover
|
107
106
|
self.orig_batch_size = inputs[0].shape[0]
|
108
107
|
|
109
108
|
samples = []
|
@@ -131,7 +130,7 @@ class FixedSampleInfoDataset:
|
|
131
130
|
|
132
131
|
def __init__(self, samples: Sequence, sample_info: Sequence):
|
133
132
|
if not all(len(info) == len(samples) for info in sample_info):
|
134
|
-
raise ValueError('Sample and additional info lengths must match')
|
133
|
+
raise ValueError('Sample and additional info lengths must match') # pragma: no cover
|
135
134
|
self.samples = samples
|
136
135
|
self.sample_info = sample_info
|
137
136
|
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py
CHANGED
@@ -20,7 +20,7 @@ from packaging import version
|
|
20
20
|
if version.parse(tf.__version__) >= version.parse("2.13"):
|
21
21
|
from keras.src.layers.core import TFOpLambda
|
22
22
|
from keras.src.layers import Conv2D, DepthwiseConv2D
|
23
|
-
else:
|
23
|
+
else: # pragma: no cover
|
24
24
|
from keras.layers.core import TFOpLambda
|
25
25
|
from keras.layers import Conv2D, DepthwiseConv2D
|
26
26
|
from model_compression_toolkit.logger import Logger
|
@@ -68,8 +68,8 @@ class ScaledDotProductDecomposition(BaseSubstitution):
|
|
68
68
|
output_shape[-2], output_shape[-1] = input_shape[-1], input_shape[-2]
|
69
69
|
transpose_node = FunctionalNode(name=f"{attention_node_name}_{key_node.name}_transpose",
|
70
70
|
framework_attr={},
|
71
|
-
input_shape=input_shape,
|
72
|
-
output_shape=output_shape,
|
71
|
+
input_shape=[input_shape],
|
72
|
+
output_shape=[output_shape],
|
73
73
|
weights={},
|
74
74
|
layer_class=torch.transpose,
|
75
75
|
op_call_args=[-1, -2], # axes to transpose
|
@@ -99,7 +99,7 @@ class ScaledDotProductDecomposition(BaseSubstitution):
|
|
99
99
|
def _get_matmul_node(self, attention_node_name: str, q_node: BaseNode, transposed_k_node: BaseNode) -> BaseNode:
|
100
100
|
matmul1_output_shape = copy(q_node.output_shape[0])
|
101
101
|
matmul1_output_shape[-2] = q_node.output_shape[0][-2]
|
102
|
-
matmul1_output_shape[-1] = transposed_k_node.output_shape[-1]
|
102
|
+
matmul1_output_shape[-1] = transposed_k_node.output_shape[0][-1]
|
103
103
|
matmul_name = f'{attention_node_name}_matmul1'
|
104
104
|
return FunctionalNode(name=matmul_name,
|
105
105
|
framework_attr={},
|
@@ -20,7 +20,7 @@ from typing import List, Any, Tuple, Callable, Type, Dict, Generator
|
|
20
20
|
import numpy as np
|
21
21
|
import torch
|
22
22
|
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
|
23
|
-
from torch import sigmoid, softmax, add, cat, argmax
|
23
|
+
from torch import sigmoid, softmax, add, cat, argmax, concat, concatenate
|
24
24
|
from torch.nn import Conv2d, ConvTranspose2d, Linear
|
25
25
|
from torch.nn import Module, Sigmoid, Softmax
|
26
26
|
|
@@ -428,7 +428,8 @@ class PytorchImplementation(FrameworkImplementation):
|
|
428
428
|
"""
|
429
429
|
|
430
430
|
return any(node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
|
431
|
-
softmax, operator.add, add, cat,
|
431
|
+
softmax, operator.add, add, cat, concat, concatenate,
|
432
|
+
operator.concat])
|
432
433
|
|
433
434
|
def get_mp_node_distance_fn(self, n: BaseNode,
|
434
435
|
compute_distance_fn: Callable = None,
|
@@ -110,7 +110,7 @@ def _extract_torch_layer_data(node_module: torch.nn.Module) -> Tuple[Any, Dict[s
|
|
110
110
|
"""
|
111
111
|
node_type = type(node_module)
|
112
112
|
if not isinstance(node_module, torch.nn.Module):
|
113
|
-
Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}")
|
113
|
+
Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}") # pragma: no cover
|
114
114
|
# Extract the instance framework_attr (i.e. the arguments the class instance was initialized with). "fullargspec"
|
115
115
|
# is a list of the layer's attribute names, that will be used as keys of the framework_attr dictionary. We the
|
116
116
|
# values from the layer instance.
|
@@ -147,12 +147,14 @@ def _extract_input_and_output_shapes(_node: Node) -> Tuple[List, List]:
|
|
147
147
|
|
148
148
|
if _node.meta[TYPE] == torch.Tensor:
|
149
149
|
output_shape = [list(_node.meta[TENSOR_META].shape)]
|
150
|
+
elif _node.meta[TYPE] == torch.Size:
|
151
|
+
output_shape = [[len(input_shape[0])]] if len(input_shape) > 0 else [[]]
|
150
152
|
elif _node.meta[TYPE] in (list, tuple):
|
151
153
|
output_shape = [list(m.shape) for m in _node.meta.get(TENSOR_META, [])]
|
152
|
-
elif _node.meta[TYPE]
|
154
|
+
elif _node.meta[TYPE] in [int, bool]:
|
153
155
|
output_shape = [[1]]
|
154
156
|
else:
|
155
|
-
output_shape = []
|
157
|
+
output_shape = [[]]
|
156
158
|
|
157
159
|
return input_shape, output_shape
|
158
160
|
|
@@ -219,16 +221,16 @@ def nodes_builder(model: GraphModule,
|
|
219
221
|
elif hasattr(torch.Tensor, node.target):
|
220
222
|
node_type = getattr(torch.Tensor, node.target)
|
221
223
|
else:
|
222
|
-
Logger.critical(f"The call method '{node.target}' in {node} is not supported.")
|
224
|
+
Logger.critical(f"The call method '{node.target}' in {node} is not supported.") # pragma: no cover
|
223
225
|
|
224
226
|
elif node.op == GET_ATTR:
|
225
227
|
# Node holding a constant -> add to consts_dict so can add them later to weights of next node.
|
226
228
|
if node.target in consts_dict:
|
227
|
-
Logger.critical('A constant weight appears to have been recorded multiple times.')
|
229
|
+
Logger.critical('A constant weight appears to have been recorded multiple times.') # pragma: no cover
|
228
230
|
consts_dict[node] = model_parameters_and_buffers[node.target]
|
229
231
|
continue
|
230
232
|
else:
|
231
|
-
Logger.critical(f'Encountered an unsupported node type in node: {node.name}.')
|
233
|
+
Logger.critical(f'Encountered an unsupported node type in node: {node.name}.') # pragma: no cover
|
232
234
|
|
233
235
|
# Add constants to weights dictionary.
|
234
236
|
if node.op != PLACEHOLDER:
|
{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|
{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/top_level.txt
RENAMED
File without changes
|