PyPI - mct-nightly - Versions diffs - 2.3.0.20250416.541__py3-none-any.whl → 2.3.0.20250418.531__py3-none-any.whl - Mend

mct-nightly 2.3.0.20250416.541py3-none-any.whl → 2.3.0.20250418.531py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{mct_nightly-2.3.0.20250416.541.dist-info → mct_nightly-2.3.0.20250418.531.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mct-nightly
-Version: 2.3.0.20250416.541
+Version: 2.3.0.20250418.531
 Summary: A Model Compression Toolkit for neural networks
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: Apache Software License

{mct_nightly-2.3.0.20250416.541.dist-info → mct_nightly-2.3.0.20250418.531.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-mct_nightly-2.3.0.20250416.541.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-model_compression_toolkit/__init__.py,sha256=AeZ2o5FMPLxX0sepHjLsV8WP2kgUvZWHt78DlPDh7u8,1557
+mct_nightly-2.3.0.20250418.531.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+model_compression_toolkit/__init__.py,sha256=kz46wlIXHqUJ124-nGslxvPJ-ClTRO6XVJAKyFnXNrk,1557
 model_compression_toolkit/constants.py,sha256=2ltuH-gdaLZoZV4CPUgKjC3S9ojz2z4OTVdenyVEypU,3912
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
@@ -13,7 +13,7 @@ model_compression_toolkit/core/runner.py,sha256=_r6cieb7Ur2BeHQK5XxTZHogjyA0utyb
 model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
 model_compression_toolkit/core/common/base_substitutions.py,sha256=xDFSmVVs_iFSZfajytI0cuQaNRNcwHX3uqOoHgVUvxQ,1666
 model_compression_toolkit/core/common/framework_implementation.py,sha256=L88uv_sfYM_56FSmxXP--emjv01_lk7IPqOI7QBZEt0,22939
-model_compression_toolkit/core/common/framework_info.py,sha256=RWeZfQOPiBroU2v4AeZoquVunNtZ4UORjOr2aRAPu8o,6279
+model_compression_toolkit/core/common/framework_info.py,sha256=5tderHT-7Cd21QrRFIJj3hH_gAcnlivOzwZ5m1ldJOs,6526
 model_compression_toolkit/core/common/memory_computation.py,sha256=ixoSpV5ZYZGyzhre3kQcvR2sNA8KBsPZ3lgbkDnw9Cs,1205
 model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3ug7Y9-lLyV99_FoNHxkGZMgcm0Vkpss,1324
 model_compression_toolkit/core/common/model_collector.py,sha256=Tno3-qx9jmPZAZyLYgbPlMLHakVfuEH5deuToZNuCb0,13195
@@ -34,8 +34,8 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
 model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=W8qZejLwbm-lkvNF3GepNL3ypO10vFRxOxbq-o_rt_I,15479
 model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=F0AaAUBpJ9JjHMB5H2LD9pdwTSWJK-Kqm9dQmGHX1Jo,7368
 model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
-model_compression_toolkit/core/common/graph/base_graph.py,sha256=3OhaMHW01okwFY4mSy0ERFCJk8AZPDs8bCKAmjvmJEI,41893
-model_compression_toolkit/core/common/graph/base_node.py,sha256=Yl6GdjnP_Rt9w1lQUm00CJI0JUAffQF7wr6mur_YfbA,34124
+model_compression_toolkit/core/common/graph/base_graph.py,sha256=2aRpL8OP-JWKc2XFdsAQjACthJZmS8zgwIX-wjBRCFQ,41383
+model_compression_toolkit/core/common/graph/base_node.py,sha256=AbUadAT581zelVcGcK9_--6CAGiht9qwkeWahwT3RzE,33389
 model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
 model_compression_toolkit/core/common/graph/functional_node.py,sha256=GH5wStmw8SoAj5IdT_-ItN1Meo_P5NUTt_5bgJC4fak,3935
 model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
@@ -67,18 +67,18 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_uti
 model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=6pLUEEIqRTVIlCYQC4JIvY55KAvuBHEX8uTOQ-1Ac4Q,3859
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=2Pp4hiYvGW2I9YhloDxQNT0sZRg3TDp9CXObloF8IFU,4971
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=GGrp7QngrWvWtPN8cQnL4IEbNwcVRc-hAUqfnxjjMmk,5998
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=NBzzhkVI407S9cIiw7t7nsP3MrkOdSnweKQdPBXb8to,38180
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=-hOMBucYn12ePyLd0b1KxniPOIRu4b53SwEzv0bWToI,4943
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=d5-3j2e_rdcQOT7c4s0p7640i3nSetjJ6MgMhhMM7dc,6152
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=a0lyySRmQ1vKikx5YvDMA4l1Eha-W5BCPYScvDlL_6c,37300
 model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=4bkM8pYKvk18cxHbx973Dz6qWrNT0MRm44cuk__qVaI,27297
 model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
-model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=fk7PWiZ6Na5O_Z_dymk_UfDCTqW_X_4EROU7DZknQnc,9444
+model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=S1ChgxtUjzXJufNWyRbKoNdyNC6fGUjPeComDMx8ZCo,9479
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=cjFnpDvxZDE4K2sgt26DhosA2XqhxHDs0eW5Qe7AwAQ,40668
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=QQwtl08DiDxUOQGpYPnek_RlZjWm1Ky7tL2ESHXMK78,4050
 model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
-model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=TaK5NqVdmygsHw9_x5JsJ-BPvlbKA9cRyTno1R8gbnU,7269
+model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=32s620FyREMBJYx3AUp6umlRfHxjqhL31PRbVtLdMJ4,6664
 model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
 model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
 model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -106,7 +106,7 @@ model_compression_toolkit/core/common/quantization/candidate_node_quantization_c
 model_compression_toolkit/core/common/quantization/core_config.py,sha256=yxCzWqldcHoe8GGxrH0tp99bhrc5jDT7SgZftnMUUBE,2374
 model_compression_toolkit/core/common/quantization/debug_config.py,sha256=uH45Uq3Tp9FIyMynex_WY2_y-Kv8LuPw2XXZydnpW5A,1649
 model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=n2A8pO7_DMMae4o69U0I00iW6mzeRlRfKHDxlQUBBuI,7204
-model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=gL3XUm85FBLvtF60jmWkPxITOBw7cs66scNtC7QHW-M,29471
+model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=0OJZtQuv-StbKZOpalvGi9lcpHJNRPeuclevSaCPggc,29792
 model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=UkSVW7d1OF_Px9gAjsqqK65aYhIBFWaBO-_IH6_AFfg,4403
 model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=HfBkSiRTOf9mNF-TNQHTCCs3xSg66F20no0O6vl5v1Y,2154
 model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=7eG7dl1TcbdnHwgmvyjarxLs0o6Lw_9VAjXAm4rsiBk,3791
@@ -157,7 +157,7 @@ model_compression_toolkit/core/keras/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7V
 model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiOcnLAq1v0MMBCJqMJzzFk,3225
 model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
 model_compression_toolkit/core/keras/data_util.py,sha256=jm54o-SlI1DJ-sEvRuX9OyLN68tEt0VxcqrdIjR98Ag,8366
-model_compression_toolkit/core/keras/default_framework_info.py,sha256=IGEHKH3IcmpRfyHuEBJTpEXu2-TDFfqQzpm8kHuj8QY,4974
+model_compression_toolkit/core/keras/default_framework_info.py,sha256=DvK1Tr6z3cQlJw1nx62iFaeSsQSXJl55xOIcJ1uNGu8,5020
 model_compression_toolkit/core/keras/keras_implementation.py,sha256=_15BrSGTRSSp_8ayuo2x-hdKanew1xuIPSumP46IGSA,32545
 model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
 model_compression_toolkit/core/keras/keras_node_prior_info.py,sha256=HUmzEXDQ8LGX7uOYSRiLZ2TNbYxLX9J9IeAa6QYlifg,3927
@@ -168,7 +168,7 @@ model_compression_toolkit/core/keras/back2framework/factory_model_builder.py,sha
 model_compression_toolkit/core/keras/back2framework/float_model_builder.py,sha256=9SFHhX-JnkB8PvYIIHRYlReBDI_RkZY9LditzW_ElLk,2444
 model_compression_toolkit/core/keras/back2framework/instance_builder.py,sha256=fBj13c6zkVoWX4JJG18_uXPptiEJqXClE_zFbaFB6Q8,4517
 model_compression_toolkit/core/keras/back2framework/keras_model_builder.py,sha256=TY86-Mb8hmo8RgCcQvkSYIthYOqV9e4VIMpqIyouJ4Y,17397
-model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=emsaCYyZBF7oQfXAR0edU7idiMInXLXRuGPcrUp4slM,15301
+model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=BTDJB6VUAyVapzkwnftdXkv9RaQfwp_GIEk1FyovdGg,14813
 model_compression_toolkit/core/keras/back2framework/quantized_model_builder.py,sha256=5wFb4nx_F0Wu4c8pLf6n6OzxOHtpOJ6_3mQsNSXIudU,2481
 model_compression_toolkit/core/keras/graph_substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
 model_compression_toolkit/core/keras/graph_substitutions/substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
@@ -222,7 +222,7 @@ model_compression_toolkit/core/keras/visualization/__init__.py,sha256=mjbqLD-KcG
 model_compression_toolkit/core/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
 model_compression_toolkit/core/pytorch/constants.py,sha256=Sg0hkUaMe88mI2_pd3KqhVz5ORnA46S1uq9Tj5qhtHc,2828
 model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
-model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=NLdmiig5a2EBxutJeDHjp8px4g_2EKt3zmntmK-NrT4,4309
+model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-byHTXmQEuOiqTAX45BHGi3mRRBF4_EfJ3XhpmVilSU,4355
 model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
 model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=c_QFo4e7t6b21CDakGhjVpqy5aXFxxqkdJ-s54HEOfs,31207
 model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
@@ -232,7 +232,7 @@ model_compression_toolkit/core/pytorch/back2framework/__init__.py,sha256=H_WixgN
 model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py,sha256=bwppTPRs6gL96nm7qPiKrNcBj4Krr0yEsOWjRF0aXmQ,2339
 model_compression_toolkit/core/pytorch/back2framework/float_model_builder.py,sha256=tLrlUyYhxVKVjkad1ZAtbRra0HedB3iVfIkZ_dYnQ-4,3419
 model_compression_toolkit/core/pytorch/back2framework/instance_builder.py,sha256=BBHBfTqeWm7L3iDyPBpk0jxvj-rBg1QWI23imkjfIl0,1467
-model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py,sha256=D7lU1r9Uq_7fdNuKk2BMF8ho5GrsY-8gyGN6yYoHaVg,15060
+model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py,sha256=K4L8FzJFM8_Ge2MHYkSqzCtoZe-ejEhVq8C1RgecyOc,14531
 model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py,sha256=WccaNiHK12IIimYu29E1oJkQHUdhPCBcIRutefTQ3Ag,19903
 model_compression_toolkit/core/pytorch/back2framework/quantized_model_builder.py,sha256=qZNNOlNTTV4ZKPG3q5GDXkIVTPUEr8dvxAS_YiMORmg,3456
 model_compression_toolkit/core/pytorch/back2framework/quantization_wrapper/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
@@ -528,7 +528,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
 model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
 model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
-mct_nightly-2.3.0.20250416.541.dist-info/METADATA,sha256=r1uKB8w4EULCSj-_wL_b-doM7GuOlu4NeTVo11pYUj0,25413
-mct_nightly-2.3.0.20250416.541.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-mct_nightly-2.3.0.20250416.541.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.3.0.20250416.541.dist-info/RECORD,,
+mct_nightly-2.3.0.20250418.531.dist-info/METADATA,sha256=l29V43qlD_uYRJsyqWxSE8HcNNbvTHVxWrr5DEvvVFw,25413
+mct_nightly-2.3.0.20250418.531.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+mct_nightly-2.3.0.20250418.531.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.3.0.20250418.531.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.3.0.20250416.000541"
+__version__ = "2.3.0.20250418.000531"

model_compression_toolkit/core/common/framework_info.py CHANGED Viewed

@@ -22,6 +22,12 @@ from mct_quantizers import QuantizationMethod
 from model_compression_toolkit.defaultdict import DefaultDict
+# Default value to use for ops without kernel.
+# This is a weird default, but it's used all over the place, so for now only extract it to const so that it can be
+# referenced by variable instead of hard-coded.
+DEFAULT_KERNEL_ATTRIBUTES = [None]
 class ChannelAxis(Enum):
     """

model_compression_toolkit/core/common/graph/base_graph.py CHANGED Viewed

@@ -16,7 +16,7 @@ from collections import namedtuple
 from copy import copy, deepcopy
 from functools import wraps
-from typing import List, Tuple, Any, Callable
+from typing import List, Tuple, Any, Callable, Dict
 import networkx as nx
 import numpy as np
@@ -684,7 +684,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
                 sorted_configurable_nodes.append(n)
         return sorted_configurable_nodes
-    def get_min_candidates_config(self, fw_info: FrameworkInfo) -> List[int]:
+    def get_min_candidates_config(self, fw_info: FrameworkInfo) -> Dict[BaseNode, int]:
         """
         Builds a minimal configuration.
         Note: we assume that a minimal configuration exists, i.e., each configurable node has exactly one candidate
@@ -694,18 +694,13 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         Args:
             fw_info: fw_info: FrameworkInfo object with information about the specific framework's model.
-        Returns: A list of candidate for each node (list on indices)
+        Returns:
+            A dict from layer to an index of its minimal candidate.
         """
         conf_sorted_nodes = self.get_configurable_sorted_nodes(fw_info)
-        min_cfg_candidates = [n.find_min_candidates_indices() for n in conf_sorted_nodes]  # list of lists of indices
-        assert all([len(lst) == 1 for lst in min_cfg_candidates]), \
-            f"A minimal config candidate must be defined, but some node have multiple potential minimal candidates"
-        return [lst[0] for lst in min_cfg_candidates]
+        return {n: n.find_min_candidate_index() for n in conf_sorted_nodes}
-    def get_max_candidates_config(self, fw_info: FrameworkInfo) -> List[int]:
+    def get_max_candidates_config(self, fw_info: FrameworkInfo) -> Dict[BaseNode, int]:
         """
         Builds a maximal configuration.
         Note: we assume that a maximal configuration exists, i.e., each configurable node has exactly one candidate
@@ -715,16 +710,11 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         Args:
             fw_info: fw_info: FrameworkInfo object with information about the specific framework's model.
-        Returns: A list of candidate for each node (list on indices)
+        Returns:
+            A dict from layer to an index of its maximal candidate.
         """
         conf_sorted_nodes = self.get_configurable_sorted_nodes(fw_info)
-        max_cfg_candidates = [n.find_max_candidates_indices() for n in conf_sorted_nodes]  # list of lists of indices
-        assert all([len(lst) == 1 for lst in max_cfg_candidates]), \
-            f"A maximal config candidate must be defined, but some node have multiple potential maximal candidates"
-        return [lst[0] for lst in max_cfg_candidates]
+        return {n: n.find_max_candidate_index() for n in conf_sorted_nodes}
     def get_final_weights_config(self, fw_info: FrameworkInfo) -> List[Tuple[BaseNode, int]]:
         """

model_compression_toolkit/core/common/graph/base_node.py CHANGED Viewed

@@ -484,49 +484,35 @@ class BaseNode:
         # for scalar shape (None,) prod returns 1
         return sum([np.prod([x for x in output_shape if x is not None]) for output_shape in output_shapes])
-    def find_min_candidates_indices(self) -> List[int]:
+    def find_min_candidate_index(self) -> int:
         """
-        Returns a list with potential minimal candidates.
-        A potential minimal candidate is a candidate which its weights_n_bits and activation_n_bits pair is
-        on the Pareto Front, i.e., there is no other candidate that its n_bits pair exceeds in both entries.
-        Returns: A list of indices of potential minimal candidates.
-        """
-        # We assume that the candidates are sorted according to weights_n_bits first and activation_n_bits second
-        # First, we add the last candidate to the set of minimal candidates (candidate, index)
-        first_min = (len(self.candidates_quantization_cfg) - 1,
-                     self.candidates_quantization_cfg[-1].activation_quantization_cfg.activation_n_bits)
-        min_candidates = [first_min]
-        # Iterate over all other candidates, and add ones with higher weights_n_bits but smaller activation_n_bits
-        for i, c in reversed(list(enumerate(self.candidates_quantization_cfg))):
-            if c.activation_quantization_cfg.activation_n_bits < first_min[1]:
-                min_candidates.append((i, c))
-        return [i for i, a_n_bits in min_candidates]
-    def find_max_candidates_indices(self) -> List[int]:
+        Returns:
+            The index of the minimal bit-width candidate.
         """
-        Returns a list with potential maximal candidates.
-        A potential maximal candidate is a candidate which its weights_n_bits and activation_n_bits pair is
-        on the Pareto Front, i.e., there is no other candidates that its n_bits pair is lower in both entries.
+        aw_nbits = [(c.activation_quantization_cfg.activation_n_bits,
+                     *[v.weights_n_bits for v in c.weights_quantization_cfg.get_all_weight_attrs_configs().values()])
+                    for c in self.candidates_quantization_cfg]
+        min_nbits = min(aw_nbits)
+        min_ind = [i for i, nb in enumerate(aw_nbits) if min_nbits == nb]
+        # check that no other candidate has a lower nbit for any weight
+        if len(min_ind) > 1 or any(nb[i] < min_nbits[i] for i in range(len(min_nbits)) for nb in aw_nbits):
+            raise ValueError('Expected exactly one candidate with min activation and min weights.')
+        return min_ind[0]
-        Returns: A list of indices of potential maximal candidates.
+    def find_max_candidate_index(self) -> int:
         """
-        # We assume that the candidates are sorted according to weights_n_bits first and activation_n_bits second
-        # First, we add the first candidate to the set of maximal candidates (candidate, index)
-        first_max = (0, self.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits)
-        max_candidates = [first_max]
-        # Iterate over all other candidates, and add ones with higher weights_n_bits but smaller activation_n_bits
-        for i, c in enumerate(self.candidates_quantization_cfg):
-            if c.activation_quantization_cfg.activation_n_bits > first_max[1]:
-                max_candidates.append((i, c))
-        return [i for i, a_n_bits in max_candidates]
+        Returns:
+            The index of the maximal bit-width candidate.
+        """
+        aw_nbits = [(c.activation_quantization_cfg.activation_n_bits,
+                     *[v.weights_n_bits for v in c.weights_quantization_cfg.get_all_weight_attrs_configs().values()])
+                    for c in self.candidates_quantization_cfg]
+        max_nbits = max(aw_nbits)
+        max_ind = [i for i, nb in enumerate(aw_nbits) if max_nbits == nb]
+        # check that no other candidate has a higher nbit for any weight
+        if len(max_ind) > 1 or any(nb[i] > max_nbits[i] for i in range(len(max_nbits)) for nb in aw_nbits):
+            raise ValueError('Expected exactly one candidate with max activation and max weights.')
+        return max_ind[0]
     def get_unique_weights_candidates(self, attr: str) -> List[Any]:
         """

model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py CHANGED Viewed

@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Tuple
+from typing import Set, Dict, Tuple
 import numpy as np
 from model_compression_toolkit.core import FrameworkInfo
-from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
@@ -36,7 +36,7 @@ class MixedPrecisionRUHelper:
         self.fw_impl = fw_impl
         self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
-    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: List[int]) -> Dict[RUTarget, np.ndarray]:
+    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Dict[BaseNode, int]) -> Dict[RUTarget, np.ndarray]:
         """
         Compute utilization of requested targets for a specific configuration:
           for weights and bops - total utilization,
@@ -74,7 +74,7 @@ class MixedPrecisionRUHelper:
                                                  f'Requested {ru_targets}')
         return ru_dict
-    def get_quantization_candidates(self, mp_cfg) \
+    def get_quantization_candidates(self, mp_cfg: Dict[BaseNode, int]) \
             -> Tuple[Dict[str, NodeActivationQuantizationConfig], Dict[str, NodeWeightsQuantizationConfig]]:
         """
         Retrieve quantization candidates objects for weights and activations from the configuration list.
@@ -86,8 +86,7 @@ class MixedPrecisionRUHelper:
             A mapping between nodes to weights quantization config, and a mapping between nodes and activation
             quantization config.
         """
-        mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
-        node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
+        node_qcs = {n: n.candidates_quantization_cfg[candidate_idx] for n, candidate_idx in mp_cfg.items()}
         act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
         w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py CHANGED Viewed

@@ -14,10 +14,10 @@
 # ==============================================================================
 from enum import Enum
-from typing import List, Callable
+from typing import List, Callable, Dict
 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
-from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.hessian import HessianInfoService
@@ -100,11 +100,13 @@ def search_bit_width(graph: Graph,
                                                  fw_impl,
                                                  se,
                                                  target_resource_utilization)
-    result_bit_cfg = search_manager.search()
+    nodes_bit_cfg = search_manager.search()
     graph.skip_validation_check = False
     if mp_config.refine_mp_solution:
-        result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_resource_utilization)
+        nodes_bit_cfg = greedy_solution_refinement_procedure(nodes_bit_cfg, search_manager, target_resource_utilization)
-    return result_bit_cfg
+    topo_bit_cfg = [nodes_bit_cfg[n] for n in graph.get_configurable_sorted_nodes(fw_info)]
+    assert len(topo_bit_cfg) == len(nodes_bit_cfg)
+    return topo_bit_cfg

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import itertools
 import copy
 from collections import defaultdict
@@ -21,7 +23,6 @@ from typing import Dict, List, Tuple
 import numpy as np
-from model_compression_toolkit.constants import EPS
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
@@ -75,34 +76,44 @@ class MixedPrecisionSearchManager:
         self.target_resource_utilization = target_resource_utilization
         self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
-        self.layer_to_bitwidth_mapping = self.get_search_space()
         self.ru_targets = target_resource_utilization.get_restricted_targets()
         self.ru_helper = MixedPrecisionRUHelper(self.mp_graph, fw_info, fw_impl)
-        self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info)
-        self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info)
+        self.min_ru_config: Dict[BaseNode, int] = self.mp_graph.get_min_candidates_config(fw_info)
+        self.max_ru_config: Dict[BaseNode, int] = self.mp_graph.get_max_candidates_config(fw_info)
         self.min_ru = self.ru_helper.compute_utilization(self.ru_targets, self.min_ru_config)
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
-    def search(self) -> List[int]:
+    def search(self) -> Dict[BaseNode, int]:
         """
         Run mixed precision search.
         Returns:
-            Indices of the selected bit-widths candidates.
+            Mapping from nodes to indices of the selected bit-widths candidate.
         """
-        candidates_sensitivity = self._build_sensitivity_mapping()
-        candidates_ru = self._compute_relative_ru_matrices()
-        rel_target_ru = self._get_relative_ru_constraint_per_mem_element()
-        solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, rel_target_ru)
-        config = solver.run()
+        mp_config = self._prepare_and_run_solver()
         if self.using_virtual_graph:
-            config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
-        return config
+            mp_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(mp_config)
+        return mp_config
+    def _prepare_and_run_solver(self) -> Dict[BaseNode, int]:
+        """
+        Prepare sensitivity and ru data for LP solver and run the solver.
+        Returns:
+            Mapping from nodes to indices of the selected bit-widths candidate.
+        """
+        layers_candidates_sensitivity: Dict[BaseNode, List[float]] = self._build_sensitivity_mapping()
+        candidates_ru = self._compute_relative_ru_matrices()
+        rel_target_ru = self._get_relative_ru_constraint_per_mem_element()
+        solver = MixedPrecisionIntegerLPSolver(layers_candidates_sensitivity, candidates_ru, rel_target_ru)
+        mp_config = solver.run()
+        return mp_config
     def _get_relative_ru_constraint_per_mem_element(self) -> Dict[RUTarget, np.ndarray]:
         """
@@ -119,7 +130,7 @@ class MixedPrecisionSearchManager:
         """
         target_ru = self.target_resource_utilization.get_resource_utilization_dict(restricted_only=True)
         rel_target_ru = {
-            ru_target: ru - self.min_ru[ru_target] for ru_target, ru in target_ru.items()
+            ru_target: (ru - self.min_ru[ru_target]) for ru_target, ru in target_ru.items()
         }
         unsatisfiable_targets = {
             ru_target.value: target_ru[ru_target] for ru_target, ru in rel_target_ru.items() if any(ru < 0)
@@ -129,28 +140,31 @@ class MixedPrecisionSearchManager:
                              f"following targets: {unsatisfiable_targets}")
         return rel_target_ru
-    def _build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
+    def _build_sensitivity_mapping(self, eps: float = 1e-6) -> Dict[BaseNode, List[float]]:
         """
         This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
-        It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
-        For each node and some possible node's bitwidth (according to the given search space), we use
-        the framework function compute_metric_fn in order to infer
-        a batch of images, and compute (using the inference results) the sensitivity metric of
-        the configured mixed-precision model.
         Args:
-            eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
+            eps: if sensitivity for a non-max candidate is lower than for a max candidate, we set it to
+              sensitivity of a max candidate + epsilon.
         Returns:
-            Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
-            the sensitivity of the model.
+            Mapping from nodes to their bitwidth candidates sensitivity.
         """
         Logger.info('Starting to evaluate metrics')
-        layer_to_metrics_mapping = {}
-        compute_metric = self.sensitivity_evaluator.compute_metric
+        orig_sorted_nodes = self.original_graph.get_configurable_sorted_nodes(self.fw_info)
+        def topo_cfg(cfg: dict) -> list:
+            topo_cfg = [cfg[n] for n in orig_sorted_nodes]
+            assert len(topo_cfg) == len(cfg)
+            return topo_cfg
+        def compute_metric(cfg, node_idx=None, baseline_cfg=None):
+            return self.sensitivity_evaluator.compute_metric(topo_cfg(cfg),
+                                                             node_idx,
+                                                             topo_cfg(baseline_cfg) if baseline_cfg else None)
         if self.using_virtual_graph:
             origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
                 self.max_ru_config)
@@ -158,19 +172,17 @@ class MixedPrecisionSearchManager:
         else:
             max_config_value = compute_metric(self.max_ru_config)
-        for node_idx, layer_possible_bitwidths_indices in tqdm(self.layer_to_bitwidth_mapping.items(),
-                                                               total=len(self.layer_to_bitwidth_mapping)):
-            layer_to_metrics_mapping[node_idx] = {}
-            for bitwidth_idx in layer_possible_bitwidths_indices:
-                if self.max_ru_config[node_idx] == bitwidth_idx:
+        layer_to_metrics_mapping = defaultdict(list)
+        for node_idx, node in tqdm(enumerate(self.mp_topo_configurable_nodes)):
+            for bitwidth_idx, _ in enumerate(node.candidates_quantization_cfg):
+                if self.max_ru_config[node] == bitwidth_idx:
                     # This is a computation of the metric for the max configuration, assign pre-calculated value
-                    layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
+                    layer_to_metrics_mapping[node].append(max_config_value)
                     continue
                 # Create a configuration that differs at one layer only from the baseline model
                 mp_model_configuration = self.max_ru_config.copy()
-                mp_model_configuration[node_idx] = bitwidth_idx
+                mp_model_configuration[node] = bitwidth_idx
                 # Build a distance matrix using the function we got from the framework implementation.
                 if self.using_virtual_graph:
@@ -180,8 +192,8 @@ class MixedPrecisionSearchManager:
                             mp_model_configuration,
                             changed_virtual_nodes_idx=[node_idx],
                             original_base_config=origin_max_config)
-                    origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
-                                                    c != origin_mp_model_configuration[i]]
+                    origin_changed_nodes_indices = [i for i, (n, c) in enumerate(origin_max_config.items()) if
+                                                    c != origin_mp_model_configuration[n]]
                     metric_value = compute_metric(
                         origin_mp_model_configuration,
                         origin_changed_nodes_indices,
@@ -191,11 +203,11 @@ class MixedPrecisionSearchManager:
                         mp_model_configuration,
                         [node_idx],
                         self.max_ru_config)
-                layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
+                metric_value = max(metric_value, max_config_value + eps)
+                layer_to_metrics_mapping[node].append(metric_value)
         # Finalize distance metric mapping
-        self.finalize_distance_metric(layer_to_metrics_mapping)
+        self._finalize_distance_metric(layer_to_metrics_mapping)
         return layer_to_metrics_mapping
@@ -221,22 +233,6 @@ class MixedPrecisionSearchManager:
         return graph, False
-    def get_search_space(self) -> Dict[int, List[int]]:
-        """
-        The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
-        for the node).
-        Returns:
-            The entire search space of the graph.
-        """
-        indices_mapping = {}
-        for idx, n in enumerate(self.mp_topo_configurable_nodes):
-            # For each node, get all possible bitwidth indices for it
-            # (which is a list from 0 to the length of the candidates mp_config list of the node).
-            indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
-        return indices_mapping
     def _compute_relative_ru_matrices(self) -> Dict[RUTarget, np.ndarray]:
         """
         Computes and builds a resource utilization matrix for all restricted targets, to be used for the
@@ -248,55 +244,41 @@ class MixedPrecisionSearchManager:
             per ru target. Num memory elements depends on the target, e.g. num cuts or 1 for cumulative metrics.
         """
         rus_per_candidate = defaultdict(list)
-        for c, c_n in enumerate(self.mp_topo_configurable_nodes):
-            for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
-                if candidate_idx == self.min_ru_config[c]:
+        for node in self.mp_topo_configurable_nodes:
+            for candidate_idx, _ in enumerate(node.candidates_quantization_cfg):
+                if candidate_idx == self.min_ru_config[node]:
                     candidate_rus = self.min_ru
                 else:
-                    candidate_rus = self.compute_ru_for_candidate(c, candidate_idx)
+                    cfg = self.min_ru_config.copy()
+                    cfg[node] = candidate_idx
+                    candidate_rus = self.ru_helper.compute_utilization(self.ru_targets, cfg)
                 for target, ru in candidate_rus.items():
                     rus_per_candidate[target].append(ru)
         # Each target contains a matrix of num configurations X num elements
-        relative_rus = {target: np.array(ru) - self.min_ru[target] for target, ru in rus_per_candidate.items()}
+        relative_rus = {target: (np.array(ru) - self.min_ru[target]) for target, ru in rus_per_candidate.items()}
         return relative_rus
-    def compute_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int) -> Dict[RUTarget, np.ndarray]:
-        """
-        Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
-        target configuration with the given candidate index.
-        Args:
-            conf_node_idx: The index of a node in a sorted configurable nodes list.
-            candidate_idx: Quantization config candidate to be used for the node's resource utilization computation.
-        Returns:
-            Node's resource utilization vector.
-        """
-        cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        return self.ru_helper.compute_utilization(self.ru_targets, cfg)
     @staticmethod
-    def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
+    def copy_config_with_replacement(mp_cfg: Dict[BaseNode, int], node: BaseNode, candidate_idx: int) -> Dict[BaseNode, int]:
         """
-        Replacing the quantization configuration candidate in a given mixed-precision configuration at the given
-        index (node's index) with the given value (candidate index).
+        Create a copy of the given mixed-precision configuration and update the candidate index for a specific node.
         Args:
-            mp_cfg: Mixed-precision configuration (list of candidates' indices)
-            idx: A configurable node's index.
-            value: A new candidate index to configure.
+            mp_cfg: Mixed-precision configuration.
+            node: Node to update the config for.
+            candidate_idx: A new candidate index to configure.
-        Returns: A new mixed-precision configuration.
+        Returns:
+            A new mixed-precision configuration.
         """
         updated_cfg = mp_cfg.copy()
-        updated_cfg[idx] = value
+        updated_cfg[node] = candidate_idx
         return updated_cfg
-    def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
+    def compute_resource_utilization_for_config(self, config: Dict[BaseNode, int]) -> ResourceUtilization:
         """
         Computes the resource utilization values for a given mixed-precision configuration.
@@ -313,7 +295,7 @@ class MixedPrecisionSearchManager:
             w_qcs=w_qcs, ru_targets=self.ru_targets, allow_unused_qcs=True)
         return ru
-    def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
+    def _finalize_distance_metric(self, layer_to_metrics_mapping: Dict[BaseNode, List[float]]):
         """
         Finalizing the distance metric building.
         The method checks to see if the maximal distance value is larger than a given threshold, and if so,
@@ -321,21 +303,20 @@ class MixedPrecisionSearchManager:
         Modification to the dictionary is done inplace.
         Args:
-            layer_to_metrics_mapping: A mapping between a node index to a mapping between
-            a bitwidth index to a distance value.
+            layer_to_metrics_mapping: A mapping between a node to a list of distance values per bitwidth candidate.
         """
         # normalize metric for numerical stability
+        max_dist = max(itertools.chain.from_iterable(layer_to_metrics_mapping.values()))
-        max_dist = max([max([d for b, d in dists.items()]) for layer, dists in layer_to_metrics_mapping.items()])
         if max_dist >= self.sensitivity_evaluator.quant_config.metric_normalization_threshold:
             Logger.warning(f"The mixed precision distance metric values indicate a large error in the quantized model."
                            f"this can cause numerical issues."
                            f"The program will proceed with mixed precision search after scaling the metric values,"
                            f"which can lead to unstable results.")
             for layer, dists in layer_to_metrics_mapping.items():
-                for b, d in dists.items():
-                    layer_to_metrics_mapping[layer][b] /= max_dist
+                for i, _ in enumerate(dists):
+                    layer_to_metrics_mapping[layer][i] /= max_dist
 class ConfigReconstructionHelper:
@@ -363,7 +344,8 @@ class ConfigReconstructionHelper:
         self.fw_info = original_graph.fw_info
         self.virtual_sorted_nodes_names = self.virtual_graph.get_configurable_sorted_nodes_names(self.fw_info)
-        self.origin_sorted_conf_nodes_names = self.original_graph.get_configurable_sorted_nodes_names(self.fw_info)
+        self.origin_sorted_conf_nodes = self.original_graph.get_configurable_sorted_nodes(self.fw_info)
+        self.origin_sorted_conf_nodes_names = [n.name for n in self.origin_sorted_conf_nodes]
         self.origin_node_idx_to_cfg = {}
@@ -375,9 +357,9 @@ class ConfigReconstructionHelper:
         self.origin_node_idx_to_cfg = {}
     def reconstruct_config_from_virtual_graph(self,
-                                              virtual_mp_cfg: List[int],
+                                              virtual_mp_cfg: Dict[BaseNode, int],
                                               changed_virtual_nodes_idx: List[int] = None,
-                                              original_base_config: List[int] = None) -> List[int]:
+                                              original_base_config: Dict[BaseNode, int] = None) -> Dict[BaseNode, int]:
         """
         Reconstructs the original config for a given virtual graph mixed-precision config.
         It iterates over all virtual configurable node (that has some chosen bit-width virtual candidate)
@@ -405,21 +387,21 @@ class ConfigReconstructionHelper:
                 [(idx, self.virtual_graph.get_configurable_sorted_nodes(self.fw_info)[idx]) for idx in changed_virtual_nodes_idx]
             # Iterating only over the virtual nodes that have updated config
             for virtual_node_idx, n in updated_virtual_nodes:
-                self.reconstruct_node_config(n, virtual_mp_cfg, virtual_node_idx)
+                self.reconstruct_node_config(n, list(virtual_mp_cfg.values()), virtual_node_idx)
             # Updating reconstructed config for all other nodes based on provided base_config
             original_sorted_conf_nodes = self.original_graph.get_configurable_sorted_nodes(self.fw_info)
-            for i in range(len(original_base_config)):
+            for i, (n, qc_ind) in enumerate(original_base_config.items()):
                 if i not in list(self.origin_node_idx_to_cfg.keys()):
-                    self.update_config_at_original_idx(n=original_sorted_conf_nodes[i],
-                                                       origin_cfg_idx=original_base_config[i])
+                    self.update_config_at_original_idx(n=n, origin_cfg_idx=qc_ind)
         else:
             # Reconstruct entire config
             for virtual_node_idx, n in enumerate(self.virtual_graph.get_configurable_sorted_nodes(self.fw_info)):
-                self.reconstruct_node_config(n, virtual_mp_cfg, virtual_node_idx)
+                self.reconstruct_node_config(n, list(virtual_mp_cfg.values()), virtual_node_idx)
         res_config = [self.origin_node_idx_to_cfg[key] for key in sorted(self.origin_node_idx_to_cfg.keys())]
         self._clear_reconstruction_dict()
-        return res_config
+        assert len(res_config) == len(self.origin_sorted_conf_nodes)
+        return {n: candidate_idx for n, candidate_idx in zip(self.origin_sorted_conf_nodes, res_config)}
     def reconstruct_node_config(self,
                                 n: BaseNode,

model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py CHANGED Viewed

@@ -12,9 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from collections import defaultdict
 import numpy as np
 from pulp import *
-from typing import Dict, Tuple, List
+from typing import Dict, Tuple, Any
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
@@ -30,23 +32,23 @@ class MixedPrecisionIntegerLPSolver:
             candidates_ru: resource utilization per candidate.
             ru_constraints: resource utilization constraints corresponding to 'candidates_ru'.
     """
-    def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]],
+    def __init__(self,
+                 layer_to_sensitivity_mapping: Dict[Any, List[float]],
                  candidates_ru: Dict[RUTarget, np.ndarray],
                  ru_constraints: Dict[RUTarget, np.ndarray]):
         self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping
         self.candidates_ru = candidates_ru
         self.ru_constraints = ru_constraints
-        self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = (
-            self._init_problem_vars(layer_to_sensitivity_mapping))
+        self.layer_to_indicator_vars, self.objective_vars = self._init_problem_vars(layer_to_sensitivity_mapping)
-    def run(self) -> List[int]:
+    def run(self) -> Dict[Any, int]:
         """
         Build and solve an ILP optimization problem.
         Returns:
-            The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
+            A dictionary from layer to the index of the selected bitwidth candidate.
         """
         # Add all equations and inequalities that define the problem.
         lp_problem = self._formalize_problem()
@@ -59,17 +61,14 @@ class MixedPrecisionIntegerLPSolver:
             raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}')
         # Take the bitwidth index only if its corresponding indicator is one.
-        config = np.asarray(
-            [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
-             nbits_to_indicator
-             in self.layer_to_indicator_vars_mapping.values()]
-        ).flatten()
-        return config.tolist()
+        mp_config = {
+            layer: [v.varValue for v in vars].index(1.) for layer, vars in self.layer_to_indicator_vars.items()
+        }
+        return mp_config
     @staticmethod
-    def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
-        Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
+    def _init_problem_vars(layer_to_metrics_mapping: Dict[Any, List[float]]) -> Tuple[Dict[Any, List[LpVariable]],
+                                                                                      List[LpVariable]]:
         """
         Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
         and a variable for each indicator for whether we use the former variable or not.
@@ -83,21 +82,18 @@ class MixedPrecisionIntegerLPSolver:
             and the second for indicators for each variable.
         """
-        layer_to_indicator_vars_mapping = dict()
-        layer_to_objective_vars_mapping = dict()
-        for layer, nbits_to_metric in layer_to_metrics_mapping.items():
-            layer_to_indicator_vars_mapping[layer] = dict()
+        layer_to_indicator_vars = defaultdict(list)
+        objective_vars = []
-            for nbits in nbits_to_metric.keys():
-                layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
-                                                                           lowBound=0,
-                                                                           upBound=1,
-                                                                           cat=LpInteger)
+        for layer_idx, (layer, bitwidth_metrics) in enumerate(layer_to_metrics_mapping.items()):
+            layer_to_indicator_vars[layer] = [
+                LpVariable(f"layer_{layer_idx}_{qc_idx}", lowBound=0, upBound=1, cat=LpInteger)
+                for qc_idx, _ in enumerate(bitwidth_metrics)
+            ]
-            layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
+            objective_vars.append(LpVariable(f"s_{layer_idx}", 0))
-        return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
+        return layer_to_indicator_vars, objective_vars
     def _formalize_problem(self) -> LpProblem:
         """
@@ -108,18 +104,16 @@ class MixedPrecisionIntegerLPSolver:
         """
         lp_problem = LpProblem()  # minimization problem by default
-        lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in
-                             self.layer_to_sensitivity_mapping.keys()])  # Objective (minimize acc loss)
+        lp_problem += lpSum(self.objective_vars)
-        for layer in self.layer_to_sensitivity_mapping.keys():
+        for layer_sensitivity, layer_indicator_vars, obj_var in zip(self.layer_to_sensitivity_mapping.values(),
+                                                                    self.layer_to_indicator_vars.values(),
+                                                                    self.objective_vars):
             # Use every bitwidth for every layer with its indicator.
-            lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits]
-                                 for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \
-                          self.layer_to_objective_vars_mapping[layer]
+            lp_problem += lpSum(list(np.multiply(layer_indicator_vars, layer_sensitivity))) == obj_var
             # Constraint of only one indicator==1
-            lp_problem += lpSum(
-                [v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1
+            lp_problem += lpSum(layer_indicator_vars) == 1
         # Bound the feasible solution space with the desired resource utilization values.
         self._add_ru_constraints(lp_problem=lp_problem)
@@ -134,10 +128,7 @@ class MixedPrecisionIntegerLPSolver:
         Args:
             lp_problem: An Lp problem object to add constraint to.
         """
-        indicators = []
-        for layer in self.layer_to_sensitivity_mapping:
-            indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values()))
-        indicators_vec = np.array(indicators)
+        indicator_vars = list(itertools.chain(*self.layer_to_indicator_vars.values()))
         for target, ru_matrix in self.candidates_ru.items():
             # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
@@ -146,7 +137,7 @@ class MixedPrecisionIntegerLPSolver:
             if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
                 assert ru_matrix.shape[1] == 1
-            indicated_ru_matrix = ru_matrix.T * indicators_vec
+            indicated_ru_matrix = ru_matrix.T * np.array(indicator_vars)
             # build lp sum term over all candidates
             ru_vec = indicated_ru_matrix.sum(axis=1)

model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py CHANGED Viewed

@@ -16,6 +16,7 @@
 from typing import List, Tuple, Dict
 from model_compression_toolkit.core import ResourceUtilization
+from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
     MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
@@ -23,9 +24,9 @@ from model_compression_toolkit.core.common.quantization.candidate_node_quantizat
 from model_compression_toolkit.logger import Logger
-def greedy_solution_refinement_procedure(mp_solution: List[int],
+def greedy_solution_refinement_procedure(mp_solution: Dict[BaseNode, int],
                                          search_manager: MixedPrecisionSearchManager,
-                                         target_resource_utilization: ResourceUtilization) -> List[int]:
+                                         target_resource_utilization: ResourceUtilization) -> Dict[BaseNode, int]:
     """
     A greedy procedure to try and improve a mixed-precision solution that was found by a mixed-precision optimization
     algorithm.
@@ -50,6 +51,8 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
         Logger.info(f'Target resource utilization constraint BOPs - Skipping MP greedy solution refinement')
         return mp_solution
+    assert search_manager.using_virtual_graph is False
     new_solution = mp_solution.copy()
     changed = True
@@ -58,17 +61,16 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
         nodes_ru = {}
         nodes_next_candidate = {}
-        for node_idx in range(len(mp_solution)):
-            if new_solution[node_idx] == 0:
+        for node in search_manager.mp_topo_configurable_nodes:
+            if new_solution[node] == 0:
                 # layer has max config in the given solution, nothing to optimize
                 continue
-            current_node = search_manager.mp_topo_configurable_nodes[node_idx]
-            node_candidates = current_node.candidates_quantization_cfg
+            node_candidates = node.candidates_quantization_cfg
             # only weights kernel attribute is quantized with weights mixed precision
             valid_candidates = _get_valid_candidates_indices(node_candidates,
-                                                             new_solution[node_idx],
+                                                             new_solution[node],
                                                              target_resource_utilization.activation_restricted(),
                                                              target_resource_utilization.weight_restricted()
                                                              )
@@ -77,7 +79,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
             updated_ru = []
             for valid_idx in valid_candidates:
                 node_updated_ru = search_manager.compute_resource_utilization_for_config(
-                    config=search_manager.replace_config_in_index(new_solution, node_idx, valid_idx))
+                    config=search_manager.copy_config_with_replacement(new_solution, node, valid_idx))
                 updated_ru.append(node_updated_ru)
             # filter out new configs that don't hold the resource utilization restrictions
@@ -88,8 +90,8 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
                 sorted_by_ru = sorted(node_filtered_ru, key=lambda node_ru: (node_ru[1].total_memory,
                                                                              node_ru[1].weights_memory,
                                                                              node_ru[1].activation_memory))
-                nodes_ru[node_idx] = sorted_by_ru[0][1]
-                nodes_next_candidate[node_idx] = sorted_by_ru[0][0]
+                nodes_ru[node] = sorted_by_ru[0][1]
+                nodes_next_candidate[node] = sorted_by_ru[0][0]
         if len(nodes_ru) > 0:
             # filter out new configs that don't hold the ru restrictions
@@ -102,7 +104,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
             new_solution[node_idx_to_upgrade] = nodes_next_candidate[node_idx_to_upgrade]
             changed = True
-    if any([mp_solution[i] != new_solution[i] for i in range(len(mp_solution))]):
+    if any([mp_solution[n] != new_solution[n] for n in mp_solution]):
         Logger.info(f'Greedy MP algorithm changed configuration from (numbers represent indices of the '
                     f'chosen bit-width candidate for each layer):\n{mp_solution}\nto\n{new_solution}')

model_compression_toolkit/core/common/quantization/node_quantization_config.py CHANGED Viewed

@@ -464,7 +464,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
                                                                                      weights_attr_cfg=attr_cfg,
                                                                                      weights_channels_axis=weights_channels_axis)
-    def get_attr_config(self, attr_name: Union[str, int]) -> WeightsAttrQuantizationConfig:
+    def get_attr_config(self, attr_name: 'WeightAttrT') -> WeightsAttrQuantizationConfig:
         """
         Returns a weights attribute config for an attribute that contains the given name.
         If multiple attributes that contain the given name are found - looking for the exact name, otherwise,
@@ -499,7 +499,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         return attr_cfg
-    def set_attr_config(self, attr_name: Union[str, int], attr_qc: WeightsAttrQuantizationConfig):
+    def set_attr_config(self, attr_name: 'WeightAttrT', attr_qc: WeightsAttrQuantizationConfig):
         """
         Adding a new attribute with quantization configuration to the node's weights configurations mapping.
@@ -513,7 +513,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         else:
             self.attributes_config_mapping[attr_name] = attr_qc
-    def has_attribute_config(self, attr_name: Union[str, int]) -> bool:
+    def has_attribute_config(self, attr_name: 'WeightAttrT') -> bool:
         """
         Checks whether the node weights configuration contains a configuration for a given weights attribute.
@@ -541,6 +541,14 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         """
         return list(self.pos_attributes_config_mapping.keys()) + list(self.attributes_config_mapping.keys())
+    def get_all_weight_attrs_configs(self) -> Dict['WeightAttrT', AttributeQuantizationConfig]:
+        """ Get quantization configs for all weights.
+            Returns:
+                A dict from weight attribute to its config.
+        """
+        return {attr: self.get_attr_config(attr) for attr in self.all_weight_attrs}
     def _extract_config_for_attributes_with_name(self, attr_name) -> Dict[str, WeightsAttrQuantizationConfig]:
         """
         Extract the saved attributes that contain the given attribute name.
@@ -560,7 +568,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
         return attrs_with_name
     def set_quant_config_attr(self, config_parameter_name: str, config_parameter_value: Any,
-                              attr_name: Union[str, int] = None, *args: List[Any], **kwargs: Dict[str, Any]):
+                              attr_name: 'WeightAttrT' = None, *args: List[Any], **kwargs: Dict[str, Any]):
         """
         This method overrides the parent class set_quant_config_attr to enable setting a specific weights
         attribute config parameter.

model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py CHANGED Viewed

@@ -137,11 +137,7 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
         float_weights = n.get_weights_by_keys(attr)
-        max_cfg_candidates = n.find_max_candidates_indices()
-        if not len(max_cfg_candidates) == 1:
-            Logger.critical(f"A maximal configuration candidate must be defined; found multiple potential maximal candidates.")# pragma: no cover
-        max_candidate_idx = max_cfg_candidates[0]
+        max_candidate_idx = n.find_max_candidate_index()
         return {'node_q_cfg': node_q_cfg_candidates,
                 'float_weights': float_weights,
@@ -178,11 +174,7 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
                 # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
                 n.sort_node_candidates(self.fw_info)
-                max_cfg_candidates = n.find_max_candidates_indices()
-                assert len(max_cfg_candidates) == 1, \
-                    f"A maximal config candidate must be defined, but some node have multiple potential maximal candidates"
-                max_candidate_idx = max_cfg_candidates[0]
+                max_candidate_idx = n.find_max_candidate_index()
                 kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
                 activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
                                                                             'max_candidate_idx': max_candidate_idx,

model_compression_toolkit/core/keras/default_framework_info.py CHANGED Viewed

@@ -25,7 +25,7 @@ else:
     from keras.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Softmax, ELU  # pragma: no cover
 from model_compression_toolkit.defaultdict import DefaultDict
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo, DEFAULT_KERNEL_ATTRIBUTES
 from mct_quantizers import QuantizationMethod
 from model_compression_toolkit.constants import SOFTMAX_THRESHOLD
 from model_compression_toolkit.core.keras.constants import SOFTMAX, LINEAR, RELU, SWISH, SIGMOID, IDENTITY, TANH, SELU, \
@@ -39,7 +39,7 @@ If a layer that is not listed here is queried, [None] is returned.
 KERNEL_ATTRIBUTES = DefaultDict({Conv2D: [KERNEL],
                                  DepthwiseConv2D: [DEPTHWISE_KERNEL],
                                  Dense: [KERNEL],
-                                 Conv2DTranspose: [KERNEL]}, [None])
+                                 Conv2DTranspose: [KERNEL]}, DEFAULT_KERNEL_ATTRIBUTES)
 """

model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py CHANGED Viewed

@@ -136,11 +136,7 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
         float_weights = n.get_weights_by_keys(attr)
-        max_cfg_candidates = n.find_max_candidates_indices()
-        if not len(max_cfg_candidates) == 1:
-            Logger.critical(f"A maximal configuration candidate must be uniquely defined; however, multiple potential maximal candidates were found.") # pragma: no cover
-        max_candidate_idx = max_cfg_candidates[0]
+        max_candidate_idx = n.find_max_candidate_index()
         return {'node_q_cfg': node_q_cfg_candidates,
                 'float_weights': float_weights,
@@ -175,10 +171,7 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
                 # if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
                 n.sort_node_candidates(self.fw_info)
-                max_cfg_candidates = n.find_max_candidates_indices()
-                assert len(max_cfg_candidates) == 1, \
-                    f"A maximal configuration candidate must be uniquely defined; however, multiple potential maximal candidates were found."
-                max_candidate_idx = max_cfg_candidates[0]
+                max_candidate_idx = n.find_max_candidate_index()
                 kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
                 activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,

model_compression_toolkit/core/pytorch/default_framework_info.py CHANGED Viewed

@@ -18,7 +18,7 @@ from torch.nn import Conv2d, ConvTranspose2d, Linear
 from torch import sigmoid
 from model_compression_toolkit.defaultdict import DefaultDict
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo, DEFAULT_KERNEL_ATTRIBUTES
 from mct_quantizers import QuantizationMethod
 from model_compression_toolkit.constants import SOFTMAX_THRESHOLD
 from model_compression_toolkit.core.pytorch.constants import KERNEL
@@ -33,7 +33,7 @@ If a layer that is not listed here is queried, [None] is returned.
 KERNEL_ATTRIBUTES = DefaultDict({Conv2d: [KERNEL],
                                  ConvTranspose2d: [KERNEL],
                                  Linear: [KERNEL]},
-                                [None])
+                                DEFAULT_KERNEL_ATTRIBUTES)
 """
 Map a layer to its kernel's output and input channels indices.

{mct_nightly-2.3.0.20250416.541.dist-info → mct_nightly-2.3.0.20250418.531.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250416.541.dist-info → mct_nightly-2.3.0.20250418.531.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250416.541.dist-info → mct_nightly-2.3.0.20250418.531.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.3.0.20250416.541__py3-none-any.whl → 2.3.0.20250418.531__py3-none-any.whl

mct-nightly 2.3.0.20250416.541py3-none-any.whl → 2.3.0.20250418.531py3-none-any.whl