PyPI - mct-nightly - Versions diffs - 2.2.0.20241224.532__py3-none-any.whl → 2.2.0.20241230.534__py3-none-any.whl - Mend

mct-nightly 2.2.0.20241224.532py3-none-any.whl → 2.2.0.20241230.534py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20241224.532
+Version: 2.2.0.20241230.534
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=9suCm_ya-q7binwaiEyGExSDb8bJgOWwJ3wBnV_el2Y,1573
+model_compression_toolkit/__init__.py,sha256=iPjPjwwH50JpoFzcLJDS6XEjBVsJQmsuUenuZxUXTPg,1573
 model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -29,7 +29,7 @@ model_compression_toolkit/core/common/collectors/mean_collector.py,sha256=mjr3U_
 model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.py,sha256=5oKsJEKdVmj4C7fKdHhmrFN5k4G2BaFETpmf_xKNs7s,5207
 model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=vcf7Pk1v09SJC4fbAWf_8AgTktE6tPizJbQpSmocP2U,7930
 model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
-model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=5VkHB2fW0ohfPQmISz6o4fCMV8QyFdj5_kU51lN0JS8,6214
+model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=b41_4rL_Adiza4vpWlmmqgvkpUmWVdfdx0nEIB0p2n8,6195
 model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
 model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
 model_compression_toolkit/core/common/graph/base_graph.py,sha256=6jlwj4WDT3XJKf1QQ4Aougswhl-Xx51QzV58CePbjVg,37818
@@ -41,11 +41,11 @@ model_compression_toolkit/core/common/graph/graph_searches.py,sha256=2oKuW6L8hP-
 model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py,sha256=3el-A7j1oyoo1_9zq3faQp7IeRsFXFCvnrb3zZFXpU0,9803
 model_compression_toolkit/core/common/graph/memory_graph/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py,sha256=X6FK3C3y8ixFRPjC_wm3ClloCX8_06SOdA1TRi7o_LA,3800
-model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=-bVPbzMMaDpbacjFOafBsxbmJFHaD4tE8IAHobLzop4,2858
+model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=7KbAQ21mToemAjmpsUKknRzoljOaQ62VTxUSsLTSskU,2877
 model_compression_toolkit/core/common/graph/memory_graph/cut.py,sha256=aPdXJPP5a5Rnu5Z5XqTZZkuGtdgHVu0RmX_NOfNM6Tc,2470
-model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=crV2NCLVO8jx9MlryZBYuJKFe_G9HfM7rUR64fDymlw,17045
-model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=gRmBEFRmyJsNKezQfiwDwQu1cmbGd2wgKCRTH6iw8mw,3961
-model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=gw4av_rzn_3oEAPpD3B7PHZDqnxHMjIESevl6ppPnkk,7175
+model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=gOVnHt0JaZvyuA0T1oTFSR59cwFO46E5OOC53h19nvg,17839
+model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=ISD2BvJWj5mB91jrFjG8VQb0oOoLBoita_thCZWzCPI,4238
+model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=3OC8kMXuzBv-R7wWmKY-i1AQNAr5x3LBZ4aj7hHF-cQ,7791
 model_compression_toolkit/core/common/hessian/__init__.py,sha256=E7LK3K_1AwMCQokanNc1JODMwUKNOKmwXQiGQ7GO10I,1033
 model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=YynbVHdHH2gPlk1QHXH6GygIkXRZ9qxR14cpgKrHPT0,13238
 model_compression_toolkit/core/common/hessian/hessian_info_utils.py,sha256=1axmN0tjJSo_7hUr2d2KMv4y1pBi19cqWSQpi4BbdsA,1458
@@ -66,18 +66,18 @@ model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha2
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=15PbLAfuIyQInFczPka_MuyO4AJzAaOm9bOi3bzllxc,4531
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=B7xLl8P5eCz0_fBxocDlNiv6k-3MdfMUk2GjYKl2p5k,7522
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=hlaV4ybreT0DR4ftLSPg5KTit3BEm9dWA7Y8NHpEJ8w,37532
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=UGoIp5Fb8WwZpBSrEr_KO7SRdhSg5XuZq2ZzjL1ILuc,39296
 model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=adjuvrJcan7Ua3nYlJX7T6qGkCRHGqWMaM5-099a9Us,27220
 model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
 model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=cjmHFU4peJ6qYP8lsIkYYSLvRddDbiSQ6mPZnZy0p6U,7905
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=MtPkZfPIJWI191Hbjp6JluUyLnqiJRi3zNf-CqVNuag,5053
-model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=Wu89Rl6gAB5vL5l8jPH-4GFeKG41jusAb_yiHQ9Sjxs,14978
+model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=zVdOL80tbVAGUBT-JzeyBNGXASmutJTCTW0G6AQz7WY,17319
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py,sha256=PmuVXCKgwRNvG7pLGdA24Ren1lFH5hW51_FrOmUVHwU,4199
-model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=mOxZwOQYnOwSJMiapEEH9o-89ujJdPxSl8zXpnApc0U,1850
-model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=WC1EHoNuo_lrzy4NRhGJ1cgmJ2IsFsbmP86mrVO3AVA,21506
+model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=Z-cFOGUysk33OQgxZrmqn6dvMorR4m3xTgxjuLkplbs,1850
+model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=wctoYZE2vskmlPKvGMZ3UVBhyKQVP362crh_k0D_Bx0,25538
 model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
-model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=YsA2CVrGt_VGJzZ9TMqPtz5b1YX_jb-Qfb9QfV-RXsc,16568
+model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=c_msFU7zoBpkcT9_-P-OLyPTDd9hZEdFjFUQ0Y9pLaY,16574
 model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
 model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
 model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -155,7 +155,7 @@ model_compression_toolkit/core/common/visualization/tensorboard_writer.py,sha256
 model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiOcnLAq1v0MMBCJqMJzzFk,3225
 model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
-model_compression_toolkit/core/keras/data_util.py,sha256=sTEuHUrT8S3CpeAEG0XDlYA0bWZKISGPilObPlO0TA8,6833
+model_compression_toolkit/core/keras/data_util.py,sha256=HQj3-GP5oT5JHpYt80mtKhZjTCvKYs6c3Ll0txEgKHQ,6892
 model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
 model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
 model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
@@ -176,7 +176,7 @@ model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_reconstruction.py,sha256=GR1a3mCZpNUu4WxixJXF_aSm57phAdxaRoHecNx3hxw,3168
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_refusing.py,sha256=5df_xGfXkqNub4xVRnCWQvSohWqdv12axjJ6edVU2H0,2478
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/concat_threshold_update.py,sha256=Hl4LEQ_bw_Vpmf3ZqHujYUqVdvTNsPlEMvr9dZhwg2U,2806
-model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=RwzqSksGNmN1KPH8RTJzpCSjGgxvtT9kqqPqsjbGPqs,11631
+model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py,sha256=YHEh3rtTD61doT_oz8Tw7fg5AKKHxXvbpW_GmVbSUVw,11651
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/dwconv_to_conv.py,sha256=R3U7cjc2E0zheMem16GHygp5jZFGSaomkNOTxTjcAgw,5794
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py,sha256=V6hp67CkS_A3WqdsjLjs0ETtdZAOo4P9mhy4aT7W5FE,5940
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py,sha256=AvquvVVVT8-ioeVn-gjqysK4L41L3I7TlNOEDfWjViY,8185
@@ -223,7 +223,7 @@ model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVv
 model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
 model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
 model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
-model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Xwt7eHS-QJJc1fyOrxL2tz8E2CP-b2M0_R-Dgb1Gm-4,29558
+model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Mfdq15JLKWAAkpnOt_urcPOVXhqONTvzORyLOG-_Klo,29659
 model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
 model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
 model_compression_toolkit/core/pytorch/utils.py,sha256=7VbgcLwtQvdEEc_AJgSOQ3U3KRKCICFPaBirN1fIQxg,3940
@@ -254,7 +254,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py,sha256=hAZXzrEinHa-dJHLj39Hy_9Q-13QyO95rtYVSLrhvT8,4915
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py,sha256=DcJEIkGvBdIMOelNIwaJUZ5UsAHiGnDJPR20I464vWo,2929
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scale_equalization.py,sha256=XFtU9yuBmoZlX0f0mS6otMPWMk-RcWs94XdvvTNhW8Y,3303
-model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=ziL7jwTnjzTf7BHPRPYgWBSCUrSXSyjZnvQqsJhD1nM,12466
+model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py,sha256=SBtIuxb1Q2oUMJKSrAyN2wuaY4k1tsKt7qql0dP_PE0,12473
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/shift_negative_activation.py,sha256=3WCLvPyx7tVkM0rwYhYq-gntCzW9R_DcImR1ucKlPac,10772
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/softmax_shift.py,sha256=05lV4pIL3hJkZl4JQPV4wk_EFD0eYLG5b8cdzvZk4P8,1588
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/transform_function_call_method.py,sha256=EC9Dvp-_UlpDWnipnf8ds65wh_Y-T8pXAFIwRScWpiY,2044
@@ -273,7 +273,7 @@ model_compression_toolkit/core/pytorch/quantizer/__init__.py,sha256=Rf1RcYmelmdZ
 model_compression_toolkit/core/pytorch/quantizer/fake_quant_builder.py,sha256=D8_CEuFqKAhbUgKaRw7Jlxo0zlqgPTMu6CIIIM4LfS0,7045
 model_compression_toolkit/core/pytorch/quantizer/lut_fake_quant.py,sha256=uyeBtNokyDUikk-YkDP_mN_2DX0J5oPm3kSfdSUT2Ck,4420
 model_compression_toolkit/core/pytorch/reader/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
-model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=mo1NIYXxiAigbTZvNgQeLi6vzLn0RqU0RxcxZKE27cE,19335
+model_compression_toolkit/core/pytorch/reader/graph_builders.py,sha256=RBNhPuz02kstVVIDibHUES_Skn9feg3gOGbQylM8h-A,19547
 model_compression_toolkit/core/pytorch/reader/node_holders.py,sha256=7XNc7-l1MZPJGcOESvtAwfIMxrU6kvt3YjF5B7qOqK4,1048
 model_compression_toolkit/core/pytorch/reader/reader.py,sha256=GEJE0QX8XJFWbYCkbRBtzttZtmmuoACLx8gw9KyAQCE,6015
 model_compression_toolkit/core/pytorch/statistics_correction/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
@@ -560,8 +560,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
 model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
 model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
-mct_nightly-2.2.0.20241224.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.2.0.20241224.532.dist-info/METADATA,sha256=TevkRWHqm2UgHf34bwK7NWHCKt4tIUfdOvpVaA4-CIU,26453
-mct_nightly-2.2.0.20241224.532.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-mct_nightly-2.2.0.20241224.532.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.2.0.20241224.532.dist-info/RECORD,,
+mct_nightly-2.2.0.20241230.534.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.2.0.20241230.534.dist-info/METADATA,sha256=yyeiq5zHdWmEdPcLFzs1V6GQR86hrjTPm05Nbj-rGCs,26453
+mct_nightly-2.2.0.20241230.534.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+mct_nightly-2.2.0.20241230.534.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.2.0.20241230.534.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20241224.000532"
+__version__ = "2.2.0.20241230.000534"

model_compression_toolkit/core/common/fusion/graph_fuser.py CHANGED Viewed

@@ -36,10 +36,10 @@ class GraphFuser:
         The fusion process involves:
             1. Creating new fused nodes to represent these groups.
             2. Updating the graph structure to replace the original nodes with fused nodes.
-            3. Maintaining mapping mapping of original node names to their fused node names.
+            3. Maintaining mapping of original node names to their fused node names.
         Args:
-            graph: Graph to sue its nodes.
+            graph: Graph to fuse its nodes.
         Returns:
             Mapping of original node names to their fused node names
@@ -54,7 +54,8 @@ class GraphFuser:
                 fused_nodes_mapping[node.name] = new_fused_node.name
         return fused_nodes_mapping
-    def _create_fused_node(self, nodes: List[BaseNode]) -> BaseNode:
+    @staticmethod
+    def _create_fused_node(nodes: List[BaseNode]) -> BaseNode:
         """
         Create a new node that represents the fusion of the given nodes.
@@ -79,10 +80,10 @@ class GraphFuser:
         return fused_node
-    def _replace_nodes_with_fused_node(self,
-                                      graph: Graph,
-                                      nodes_to_fuse: List[BaseNode],
-                                      fused_node: BaseNode):
+    @staticmethod
+    def _replace_nodes_with_fused_node(graph: Graph,
+                                       nodes_to_fuse: List[BaseNode],
+                                       fused_node: BaseNode):
         """
         Replace the specified nodes in the graph with a new fused node.

model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py CHANGED Viewed

@@ -51,13 +51,13 @@ def compute_graph_max_cut(memory_graph: MemoryGraph,
         estimate = (u_bound + l_bound) / 2
         schedule, max_cut_size, cuts = max_cut_astar.solve(estimate_factor=estimate, iter_limit=astar_n_iter)
         if schedule is None:
-            return last_result
+            l_bound = estimate
+        else:
+            u_bound = min(estimate, max_cut_size)
+            last_result = (schedule, max_cut_size, cuts)
-        next_u_bound = min(estimate, max_cut_size)
-        last_result = (schedule, max_cut_size, cuts)
-        if l_bound * (1 + eps) >= next_u_bound:
-            return last_result
+            if l_bound * (1 + eps) >= u_bound:
+                return last_result
         it += 1

model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py CHANGED Viewed

@@ -154,6 +154,9 @@ class MaxCutAstar:
             cut_route = routes[next_cut]
             if next_cut == self.target_cut:
+                # TODO maxcut: Why do we filter the cuts (cut_route) but not the max cut size (cut_sost).
+                #              This is a mismatch between max_cut and max(cuts).
+                #              Also, unfiltered cut_route seems perfect, including input and output tensor sizes of current op.
                 return self._remove_dummys_from_path(cut_route[0].op_order), cut_cost,\
                        list(set([self._remove_dummys_from_cut(self.clean_memory_for_next_step(c)) for c in cut_route]))
@@ -178,7 +181,8 @@ class MaxCutAstar:
                 cost = self.accumulate(cut_cost, c.memory_size())
                 if c not in open_list:
                     self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
-                elif self.ordering(cost, costs[c]):
+                # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
+                elif self.ordering(cost, costs[c]):  # pragma: no cover
                     # If we already saw this cut during the search with a larger cost, then we want to update the order
                     # of the schedule in the cut
                     # Remove call - removes the cut with the same memory elements but different ordering from open
@@ -187,7 +191,8 @@ class MaxCutAstar:
                     self._update_expanded_node(c, cost, cut_route, open_list, costs, routes)
         # Halt or No Solution
-        return None, 0, None
+        # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
+        return None, 0, None  # pragma: no cover
     @staticmethod
     def _update_expanded_node(cut: Cut, cost: float, route: List[Cut], open_list: List[Cut],
@@ -223,8 +228,7 @@ class MaxCutAstar:
         """
         ordered_cuts_list = sorted(open_list,
-                                   key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), len(routes[c])),
-                                   reverse=False)
+                                   key=lambda c: (self.accumulate(costs[c], self.estimate(c, estimate_factor)), -len(routes[c])))
         assert len(ordered_cuts_list) > 0
         return ordered_cuts_list[0]
@@ -349,7 +353,8 @@ class MaxCutAstar:
         Returns: True if the first cost is smaller than the second one, else otherwise.
         """
-        return cost_1 < cost_2
+        # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
+        return cost_1 < cost_2  # pragma: no cover
     def estimate(self, cut: Cut, estimate_factor: float) -> float:
         """
@@ -377,9 +382,10 @@ class MaxCutAstar:
         Returns: An initial estimate value.
         """
-        l_bound = memory_graph.memory_lbound_single_op
-        u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound
-        return (u_bound + l_bound) / 2
+        # TODO maxcut: this isn't covered in the coverage test. check if needed and remove no cover
+        l_bound = memory_graph.memory_lbound_single_op  # pragma: no cover
+        u_bound = 2 * sum([t.total_size for t in memory_graph.b_nodes]) - l_bound  # pragma: no cover
+        return (u_bound + l_bound) / 2  # pragma: no cover
     @staticmethod
     def _remove_dummys_from_path(path: List[BaseNode]) -> List[BaseNode]:

model_compression_toolkit/core/common/graph/memory_graph/memory_element.py CHANGED Viewed

@@ -30,7 +30,12 @@ class ActivationMemoryTensor:
             init_size_to_zero: Whether to initialize the memory tensor size to 0 or not.
         """
-        self.shape = shape[1:]  # remove batch size (first element) from output shape
+        # remove batch size (first element) from output shape. If the shape is a list then remove the first
+        # axis. If shape a vector (e.g. output of size) then set the shape minus 1 to ignore the batch value.
+        if len(shape) == 1:
+            self.shape = [] if shape[0] is None else [shape[0] - 1]
+        else:
+            self.shape = shape[1:]
         # The total size of a tensor is considered to be the number of elements in the tensor
         self.total_size = self._get_tensor_total_size() if not init_size_to_zero else 0

model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 from typing import List
+from operator import getitem
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.graph.edge import EDGE_SOURCE_INDEX
@@ -45,7 +46,8 @@ class MemoryGraph(DirectedBipartiteGraph):
         tensor_to_node = []
         for n in nodes:
-            n_outputs = [n.output_shape] if isinstance(n.output_shape, tuple) else n.output_shape
+            n_outputs = n.output_shape if isinstance(n.output_shape[0], (tuple, list)) else [n.output_shape]
             out_edges = model_graph.out_edges(n, sort_by_attr=EDGE_SOURCE_INDEX)
             for i, ot in enumerate(n_outputs):
@@ -54,7 +56,16 @@ class MemoryGraph(DirectedBipartiteGraph):
                 # Add memory tensor as current node's output
                 node_to_tensor.append((n, memory_tensor))
-                ot_edges = [oe for oe in out_edges if oe.source_index == i]
+                # TODO maxcut: refactor this code. it handles split->getitem generated by fx.
+                ot_edges = []
+                for oe in out_edges:
+                    if oe.sink_node.type is getitem and len(oe.sink_node.op_call_args) == 1 and isinstance(oe.sink_node.op_call_args[0], int):
+                        source_index = oe.sink_node.op_call_args[0]
+                    else:
+                        source_index = oe.source_index
+                    if source_index == i:
+                        ot_edges.append(oe)
                 for oe in ot_edges:
                     # Add current memory tensor as input to current node's successors
                     tensor_to_node.append((memory_tensor, oe.sink_node))
@@ -71,6 +82,7 @@ class MemoryGraph(DirectedBipartiteGraph):
         inputs_tensors_memory = [sum([t.total_size for t in self.operation_node_children(n)])
                                  for n in nodes if n in model_graph.get_inputs()]
+        # TODO maxcut: why both inputs and outputs of each nodes, while the A* solves for node outputs only???
         nodes_total_memory = [sum([t.total_size for t in self.operation_node_children(n)] +
                                   [t.total_size for t in self.operation_node_parents(n)])
                               for n in nodes if n not in model_graph.get_inputs()]

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -24,8 +24,10 @@ from model_compression_toolkit.core.common.graph.base_graph import Graph
 from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
     VirtualSplitWeightsNode, VirtualSplitActivationNode
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_functions_mapping import RuFunctions
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric, calc_graph_cuts
+from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import Cut
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
@@ -40,7 +42,7 @@ class MixedPrecisionSearchManager:
                  fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
-                 ru_functions: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
+                 ru_functions: Dict[RUTarget, RuFunctions],
                  target_resource_utilization: ResourceUtilization,
                  original_graph: Graph = None):
         """
@@ -65,8 +67,11 @@ class MixedPrecisionSearchManager:
         self.sensitivity_evaluator = sensitivity_evaluator
         self.layer_to_bitwidth_mapping = self.get_search_space()
         self.compute_metric_fn = self.get_sensitivity_metric()
+        self._cuts = None
-        self.compute_ru_functions = ru_functions
+        ru_types = [ru_target for ru_target, ru_value in
+                    target_resource_utilization.get_resource_utilization_dict().items() if ru_value < np.inf]
+        self.compute_ru_functions = {ru_target: ru_fn for ru_target, ru_fn in ru_functions.items() if ru_target in ru_types}
         self.target_resource_utilization = target_resource_utilization
         self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
@@ -76,6 +81,17 @@ class MixedPrecisionSearchManager:
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
                                                                        original_graph=self.original_graph)
+    @property
+    def cuts(self) -> List[Cut]:
+        """
+        Calculates graph cuts. Written as property, so it will only be calculated once and
+        only if cuts are needed.
+        """
+        if self._cuts is None:
+            self._cuts = calc_graph_cuts(self.original_graph)
+        return self._cuts
     def get_search_space(self) -> Dict[int, List[int]]:
         """
         The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
@@ -106,6 +122,21 @@ class MixedPrecisionSearchManager:
         return self.sensitivity_evaluator.compute_metric
+    def _calc_ru_fn(self, ru_target, ru_fn, mp_cfg) -> np.ndarray:
+        """
+        Computes a resource utilization for a certain mixed precision configuration.
+        The method computes a resource utilization vector for specific target resource utilization.
+        Returns: resource utilization value.
+        """
+        # ru_fn is a pair of resource utilization computation method and
+        # resource utilization aggregation method (in this method we only need the first one)
+        if ru_target is RUTarget.ACTIVATION:
+            return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl, self.cuts)
+        else:
+            return ru_fn.metric_fn(mp_cfg, self.graph, self.fw_info, self.fw_impl)
     def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
         """
         Computes a resource utilization vector with the values matching to the minimal mp configuration
@@ -118,10 +149,10 @@ class MixedPrecisionSearchManager:
         """
         min_ru = {}
-        for ru_target, ru_fns in self.compute_ru_functions.items():
-            # ru_fns is a pair of resource utilization computation method and
+        for ru_target, ru_fn in self.compute_ru_functions.items():
+            # ru_fns is a pair of resource utilization computation method and
             # resource utilization aggregation method (in this method we only need the first one)
-            min_ru[ru_target] = ru_fns[0](self.min_ru_config, self.graph, self.fw_info, self.fw_impl)
+            min_ru[ru_target] = self._calc_ru_fn(ru_target, ru_fn, self.min_ru_config)
         return min_ru
@@ -212,7 +243,7 @@ class MixedPrecisionSearchManager:
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        return self.compute_ru_functions[target].metric_fn(cfg, self.graph, self.fw_info, self.fw_impl)
+        return self._calc_ru_fn(target, self.compute_ru_functions[target], cfg)
     @staticmethod
     def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -241,13 +272,15 @@ class MixedPrecisionSearchManager:
         """
         non_conf_ru_dict = {}
-        for target, ru_value in self.target_resource_utilization.get_resource_utilization_dict().items():
+        for target, ru_fns in self.compute_ru_functions.items():
             # Call for the ru method of the given target - empty quantization configuration list is passed since we
             # compute for non-configurable nodes
             if target == RUTarget.BOPS:
                 ru_vector = None
+            elif target == RUTarget.ACTIVATION:
+                ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl, self.cuts)
             else:
-                ru_vector = self.compute_ru_functions[target].metric_fn([], self.graph, self.fw_info, self.fw_impl)
+                ru_vector = ru_fns.metric_fn([], self.graph, self.fw_info, self.fw_impl)
             non_conf_ru_dict[target] = ru_vector
@@ -266,14 +299,15 @@ class MixedPrecisionSearchManager:
         """
         ru_dict = {}
         for ru_target, ru_fns in self.compute_ru_functions.items():
             # Passing False to ru methods and aggregations to indicates that the computations
             # are not for constraints setting
             if ru_target == RUTarget.BOPS:
-                configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False)
+                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl, False)
+            elif ru_target == RUTarget.ACTIVATION:
+                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.graph, self.fw_info, self.fw_impl, self.cuts)
             else:
-                configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
+                configurable_nodes_ru_vector = ru_fns.metric_fn(config, self.original_graph, self.fw_info, self.fw_impl)
             non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
             if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
                 ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
@@ -647,7 +681,7 @@ class ConfigReconstructionHelper:
                 # It's ok, need to find the node's configuration
                 self.retrieve_weights_activation_config(activation_node, weights_node, virtual_node, virtual_cfg_idx, virtual_mp_cfg)
             else:
-                Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.")  # pragma: no cover
+                Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{weights_node.name}' to have multiple outputs when not composed with an activation node.")  # pragma: no cover
     def update_config_at_original_idx(self, n: BaseNode, origin_cfg_idx: int):
         """

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py CHANGED Viewed

@@ -13,10 +13,12 @@
 # limitations under the License.
 # ==============================================================================
 import copy
+from collections import defaultdict
 import numpy as np
 from typing import Callable, Any, Dict, Tuple
+from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
 from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
 from model_compression_toolkit.core.common import Graph
@@ -25,6 +27,7 @@ from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
 from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import QuantizationConfigOptions
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import calc_graph_cuts
 def compute_resource_utilization_data(in_model: Any,
@@ -76,7 +79,7 @@ def compute_resource_utilization_data(in_model: Any,
     total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)
     # Compute max activation tensor
-    activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph)
+    activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_maxcut_sizes(graph=transformed_graph)
     max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
     # Compute total memory utilization - parameters sum + max activation tensor
@@ -132,7 +135,52 @@ def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[
     return np.array(weights_memory_bytes), np.array(weights_params)
-def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
+def compute_activation_output_maxcut_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Computes an array of the respective output tensor maxcut size and an array of the output tensor
+    cut size in bytes for each cut.
+    Args:
+        graph: A finalized Graph object, representing the model structure.
+    Returns:
+    A tuple containing two arrays:
+        - The first is an array of the size of each activation max-cut size in bytes, calculated
+          using the maximal bit-width for quantization.
+        - The second array an array of the size of each activation max-cut activation size in number of parameters.
+    """
+    cuts = calc_graph_cuts(graph)
+    # map nodes to cuts.
+    node_to_cat_mapping = defaultdict(list)
+    for i, cut in enumerate(cuts):
+        mem_element_names = [m.node_name for m in cut.mem_elements.elements]
+        for m_name in mem_element_names:
+            if len(graph.find_node_by_name(m_name)) > 0:
+                node_to_cat_mapping[m_name].append(i)
+            else:
+                Logger.critical(f"Missing node: {m_name}")  # pragma: no cover
+    activation_outputs = np.zeros(len(cuts))
+    activation_outputs_bytes = np.zeros(len(cuts))
+    for n in graph.nodes:
+        # Go over all nodes that have activation quantization enabled.
+        if n.has_activation_quantization_enabled_candidate():
+            # Fetch maximum bits required for activations quantization.
+            max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
+            node_output_size = n.get_total_output_params()
+            for cut_index in node_to_cat_mapping[n.name]:
+                activation_outputs[cut_index] += node_output_size
+                # Calculate activation size in bytes and append to list
+                activation_outputs_bytes[cut_index] += node_output_size * max_activation_bits / BITS_TO_BYTES
+    return activation_outputs_bytes, activation_outputs
+# TODO maxcut: add test for this function and remove no cover
+def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:  # pragma: no cover
     """
     Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
     each node.
@@ -146,9 +194,7 @@ def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarra
           calculated using the maximal bit-width for quantization.
         - The second array represents the size of each node's activation output tensor size.
     """
     activation_outputs = []
     activation_outputs_bytes = []
     for n in graph.nodes:
@@ -238,16 +284,17 @@ def requires_mixed_precision(in_model: Any,
     total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)
     # Compute max activation tensor in bytes
-    activation_output_sizes_bytes, _ = compute_activation_output_sizes(transformed_graph)
-    max_activation_tensor_size_bytes = 0 if len(activation_output_sizes_bytes) == 0 else max(activation_output_sizes_bytes)
+    activation_memory_estimation_bytes, _ = compute_activation_output_maxcut_sizes(transformed_graph)
+    max_activation_memory_estimation_bytes = 0 if len(activation_memory_estimation_bytes) == 0 \
+        else max(activation_memory_estimation_bytes)
     # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
     bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
     bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
     is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
-    is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_tensor_size_bytes
-    is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_tensor_size_bytes
+    is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_memory_estimation_bytes
+    is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_memory_estimation_bytes
     is_mixed_precision |= target_resource_utilization.bops < bops_count
     return is_mixed_precision

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py CHANGED Viewed

@@ -28,6 +28,6 @@ class RuFunctions(NamedTuple):
 ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
-                        RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
+                        RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_MAXCUT_SIZE, MpRuAggregation.MAX),
                         RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
                         RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py CHANGED Viewed

@@ -14,7 +14,8 @@
 # ==============================================================================
 from enum import Enum
 from functools import partial
-from typing import List
+from typing import List, Optional
+from copy import deepcopy
 import numpy as np
@@ -25,6 +26,8 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
     VirtualSplitWeightsNode, VirtualSplitActivationNode
+from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
+from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
 from model_compression_toolkit.logger import Logger
@@ -87,10 +90,91 @@ def weights_size_utilization(mp_cfg: List[int],
     return np.array(weights_memory)
+def calc_graph_cuts(graph: Graph) -> List[Cut]:
+    """
+    Calculate graph activation cuts.
+    Args:
+        graph: A graph object to calculate activation cuts on.
+    Returns:
+        A list of activation cuts.
+    """
+    memory_graph = MemoryGraph(deepcopy(graph))
+    _, _, cuts = compute_graph_max_cut(memory_graph)
+    if cuts is None:
+        Logger.critical("Failed to calculate activation memory cuts for graph.")  # pragma: no cover
+    # filter empty cuts and cuts that contain only nodes with activation quantization disabled.
+    filtered_cuts = []
+    for cut in cuts:
+        cut_has_no_act_quant_nodes = any(
+            [graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
+             for e in cut.mem_elements.elements])
+        if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
+            filtered_cuts.append(cut)
+    return filtered_cuts
+def activation_maxcut_size_utilization(mp_cfg: List[int],
+                                       graph: Graph,
+                                       fw_info: FrameworkInfo,
+                                       fw_impl: FrameworkImplementation,
+                                       cuts: Optional[List[Cut]] = None) -> np.ndarray:
+    """
+    Computes a resource utilization vector with the respective output memory max-cut size for activation
+    nodes, according to the given mixed-precision configuration.
+    Args:
+        mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
+        graph: Graph object.
+        fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
+            (not used in this method).
+        fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
+        cuts: a list of graph cuts (optional. if not provided calculated locally).
+    TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
+    Returns: A vector of node's cut memory sizes.
+    Note that the vector is not necessarily of the same length as the given config.
+    """
+    if len(mp_cfg) == 0:
+        # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
+        # configurable nodes.
+        return np.array([])
+    activation_cut_memory = []
+    mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
+    # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
+    nodes_act_nbits = {}
+    for n in graph.get_sorted_activation_configurable_nodes():
+        node_idx = mp_nodes.index(n.name)
+        node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
+        node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
+        nodes_act_nbits[n.name] = node_nbits
+    if cuts is None:
+        cuts = calc_graph_cuts(graph)
+    for i, cut in enumerate(cuts):
+        mem_elements = [m.node_name for m in cut.mem_elements.elements]
+        mem = 0
+        for op_name in mem_elements:
+            n = graph.find_node_by_name(op_name)[0]
+            if n.is_activation_quantization_enabled():
+                base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
+                mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
+        activation_cut_memory.append(mem)
+    return np.array(activation_cut_memory)
+# TODO maxcut: add test for this function and remove no cover
 def activation_output_size_utilization(mp_cfg: List[int],
                                        graph: Graph,
                                        fw_info: FrameworkInfo,
-                                       fw_impl: FrameworkImplementation) -> np.ndarray:
+                                       fw_impl: FrameworkImplementation) -> np.ndarray:  # pragma: no cover
     """
     Computes a resource utilization vector with the respective output memory size for each activation configurable node,
     according to the given mixed-precision configuration.
@@ -424,6 +508,8 @@ class MpRuMetric(Enum):
      WEIGHTS_SIZE - applies the weights_size_utilization function
+     ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
      ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
      TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
@@ -433,6 +519,7 @@ class MpRuMetric(Enum):
     """
     WEIGHTS_SIZE = partial(weights_size_utilization)
+    ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
     ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
     TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
     BOPS_COUNT = partial(bops_utilization)

model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py CHANGED Viewed

@@ -27,7 +27,7 @@ SOLVER_TIME_LIMIT = 60
 def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
-                                  target_resource_utilization: ResourceUtilization = None) -> List[int]:
+                                  target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
     """
     Searching and returning a mixed-precision configuration using an ILP optimization solution.
     It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
@@ -44,7 +44,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
         consumption).
     Returns:
-        The mixed-precision configuration (list of indices. Each indicates the bitwidth index of a node).
+        The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
     """

model_compression_toolkit/core/keras/data_util.py CHANGED Viewed

@@ -58,6 +58,7 @@ def flat_gen_fn(data_gen_fn: Callable[[], Generator]):
     return gen
 class TFDatasetFromGenerator:
     """
     TensorFlow dataset from a data generator function, batched to a specified size.
@@ -70,7 +71,7 @@ class TFDatasetFromGenerator:
         """
         inputs = next(data_gen_fn())
         if not isinstance(inputs, list):
-            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
+            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')  # pragma: no cover
         self.orig_batch_size = inputs[0].shape[0]
         self._size = None
@@ -78,7 +79,6 @@ class TFDatasetFromGenerator:
         output_signature = get_tensor_spec(inputs, ignore_batch_dim=True)
         self.dataset = tf.data.Dataset.from_generator(flat_gen_fn(data_gen_fn), output_signature=output_signature)
     def __iter__(self):
         return iter(self.dataset)
@@ -89,7 +89,6 @@ class TFDatasetFromGenerator:
         return self._size
 class FixedTFDataset:
     """
     Fixed dataset containing samples from a generator, stored in memory.
@@ -103,7 +102,7 @@ class FixedTFDataset:
         """
         inputs = next(data_gen_fn())
         if not isinstance(inputs, list):
-            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')
+            raise TypeError(f'Data generator is expected to yield a list of tensors, got {type(inputs)}')  # pragma: no cover
         self.orig_batch_size = inputs[0].shape[0]
         samples = []
@@ -131,7 +130,7 @@ class FixedSampleInfoDataset:
     def __init__(self, samples: Sequence, sample_info: Sequence):
         if not all(len(info) == len(samples) for info in sample_info):
-            raise ValueError('Sample and additional info lengths must match')
+            raise ValueError('Sample and additional info lengths must match')  # pragma: no cover
         self.samples = samples
         self.sample_info = sample_info

model_compression_toolkit/core/keras/graph_substitutions/substitutions/conv_funcs_to_layer.py CHANGED Viewed

@@ -20,7 +20,7 @@ from packaging import version
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers.core import TFOpLambda
     from keras.src.layers import Conv2D, DepthwiseConv2D
-else:
+else:  # pragma: no cover
     from keras.layers.core import TFOpLambda
     from keras.layers import Conv2D, DepthwiseConv2D
 from model_compression_toolkit.logger import Logger

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/scaled_dot_product_attention.py CHANGED Viewed

@@ -68,8 +68,8 @@ class ScaledDotProductDecomposition(BaseSubstitution):
         output_shape[-2], output_shape[-1] = input_shape[-1], input_shape[-2]
         transpose_node = FunctionalNode(name=f"{attention_node_name}_{key_node.name}_transpose",
                                         framework_attr={},
-                                        input_shape=input_shape,
-                                        output_shape=output_shape,
+                                        input_shape=[input_shape],
+                                        output_shape=[output_shape],
                                         weights={},
                                         layer_class=torch.transpose,
                                         op_call_args=[-1, -2],  # axes to transpose
@@ -99,7 +99,7 @@ class ScaledDotProductDecomposition(BaseSubstitution):
     def _get_matmul_node(self, attention_node_name: str, q_node: BaseNode, transposed_k_node: BaseNode) -> BaseNode:
         matmul1_output_shape = copy(q_node.output_shape[0])
         matmul1_output_shape[-2] = q_node.output_shape[0][-2]
-        matmul1_output_shape[-1] = transposed_k_node.output_shape[-1]
+        matmul1_output_shape[-1] = transposed_k_node.output_shape[0][-1]
         matmul_name = f'{attention_node_name}_matmul1'
         return FunctionalNode(name=matmul_name,
                               framework_attr={},

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -20,7 +20,7 @@ from typing import List, Any, Tuple, Callable, Type, Dict, Generator
 import numpy as np
 import torch
 from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
-from torch import sigmoid, softmax, add, cat, argmax
+from torch import sigmoid, softmax, add, cat, argmax, concat, concatenate
 from torch.nn import Conv2d, ConvTranspose2d, Linear
 from torch.nn import Module, Sigmoid, Softmax
@@ -428,7 +428,8 @@ class PytorchImplementation(FrameworkImplementation):
         """
         return any(node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
-                                                           softmax, operator.add, add, cat, operator.concat])
+                                                           softmax, operator.add, add, cat, concat, concatenate,
+                                                           operator.concat])
     def get_mp_node_distance_fn(self, n: BaseNode,
                                 compute_distance_fn: Callable = None,

model_compression_toolkit/core/pytorch/reader/graph_builders.py CHANGED Viewed

@@ -110,7 +110,7 @@ def _extract_torch_layer_data(node_module: torch.nn.Module) -> Tuple[Any, Dict[s
     """
     node_type = type(node_module)
     if not isinstance(node_module, torch.nn.Module):
-        Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}")
+        Logger.error(f"Expected an instance of torch.nn.Module for node {node_module.name}, but got {node_type}")  # pragma: no cover
     # Extract the instance framework_attr (i.e. the arguments the class instance was initialized with). "fullargspec"
     # is a list of the layer's attribute names, that will be used as keys of the framework_attr dictionary. We the
     # values from the layer instance.
@@ -147,12 +147,14 @@ def _extract_input_and_output_shapes(_node: Node) -> Tuple[List, List]:
     if _node.meta[TYPE] == torch.Tensor:
         output_shape = [list(_node.meta[TENSOR_META].shape)]
+    elif _node.meta[TYPE] == torch.Size:
+        output_shape = [[len(input_shape[0])]] if len(input_shape) > 0 else [[]]
     elif _node.meta[TYPE] in (list, tuple):
         output_shape = [list(m.shape) for m in _node.meta.get(TENSOR_META, [])]
-    elif _node.meta[TYPE] == int:
+    elif _node.meta[TYPE] in [int, bool]:
         output_shape = [[1]]
     else:
-        output_shape = []
+        output_shape = [[]]
     return input_shape, output_shape
@@ -219,16 +221,16 @@ def nodes_builder(model: GraphModule,
             elif hasattr(torch.Tensor, node.target):
                 node_type = getattr(torch.Tensor, node.target)
             else:
-                Logger.critical(f"The call method '{node.target}' in {node} is not supported.")
+                Logger.critical(f"The call method '{node.target}' in {node} is not supported.")  # pragma: no cover
         elif node.op == GET_ATTR:
             # Node holding a constant -> add to consts_dict so can add them later to weights of next node.
             if node.target in consts_dict:
-                Logger.critical('A constant weight appears to have been recorded multiple times.')
+                Logger.critical('A constant weight appears to have been recorded multiple times.')  # pragma: no cover
             consts_dict[node] = model_parameters_and_buffers[node.target]
             continue
         else:
-            Logger.critical(f'Encountered an unsupported node type in node: {node.name}.')
+            Logger.critical(f'Encountered an unsupported node type in node: {node.name}.')  # pragma: no cover
         # Add constants to weights dictionary.
         if node.op != PLACEHOLDER:

{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241224.532.dist-info → mct_nightly-2.2.0.20241230.534.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20241224.532__py3-none-any.whl → 2.2.0.20241230.534__py3-none-any.whl

mct-nightly 2.2.0.20241224.532py3-none-any.whl → 2.2.0.20241230.534py3-none-any.whl