mct-nightly 2.2.0.20241202.131715__py3-none-any.whl → 2.2.0.20241204.524__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/RECORD +18 -18
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/base_graph.py +9 -5
- model_compression_toolkit/core/common/graph/base_node.py +2 -3
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +32 -35
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +9 -9
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +5 -11
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +12 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +11 -4
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +4 -6
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +6 -11
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +6 -9
- model_compression_toolkit/core/keras/keras_implementation.py +23 -27
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +2 -4
- {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=CwUJhq62PjrcRJgkwwmu5dArRV2bz7lgnxc2ebnm840,1573
|
2
2
|
model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -32,8 +32,8 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
|
|
32
32
|
model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=8seu9jBpC7HartP1nJd7S_SYFICyemVpDV9ZJ0QUQ7E,6212
|
33
33
|
model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
|
34
34
|
model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
|
35
|
-
model_compression_toolkit/core/common/graph/base_graph.py,sha256=
|
36
|
-
model_compression_toolkit/core/common/graph/base_node.py,sha256=
|
35
|
+
model_compression_toolkit/core/common/graph/base_graph.py,sha256=GG13PAtndsMjIqINfrCN6llVkFrg5CBfij4z99ntieU,37815
|
36
|
+
model_compression_toolkit/core/common/graph/base_node.py,sha256=mGiDcHnL5KybDYSiONSWtjrHOXI6tjjfACfjv2Piogc,31756
|
37
37
|
model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
|
38
38
|
model_compression_toolkit/core/common/graph/functional_node.py,sha256=GH5wStmw8SoAj5IdT_-ItN1Meo_P5NUTt_5bgJC4fak,3935
|
39
39
|
model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
|
@@ -63,21 +63,21 @@ model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256
|
|
63
63
|
model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
|
64
64
|
model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
|
65
65
|
model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
|
66
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=
|
66
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=nZb0_inMxgqlx5lG6uMF0sskHR-5zMSClzxYALBeqLA,4531
|
67
67
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
|
68
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=
|
69
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=
|
70
|
-
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=
|
68
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=B7xLl8P5eCz0_fBxocDlNiv6k-3MdfMUk2GjYKl2p5k,7522
|
69
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=hlaV4ybreT0DR4ftLSPg5KTit3BEm9dWA7Y8NHpEJ8w,37532
|
70
|
+
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=adjuvrJcan7Ua3nYlJX7T6qGkCRHGqWMaM5-099a9Us,27220
|
71
71
|
model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
|
72
72
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=KifDMbm7qkSfvSl6pcZzQ82naIXzeKL6aT-VsvWZYyc,7901
|
73
73
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=
|
74
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=MtPkZfPIJWI191Hbjp6JluUyLnqiJRi3zNf-CqVNuag,5053
|
75
75
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=sFuUgWwC0aEBpf9zWmCTIcAbykBj3t5vmWAoB_BjYWA,14979
|
76
76
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py,sha256=ttc8wPa_9LZansutQ2f1ss-RTzgTv739wy3qsdLzyyk,4217
|
77
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=
|
77
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=mOxZwOQYnOwSJMiapEEH9o-89ujJdPxSl8zXpnApc0U,1850
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=WC1EHoNuo_lrzy4NRhGJ1cgmJ2IsFsbmP86mrVO3AVA,21506
|
79
79
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
80
|
-
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=
|
80
|
+
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=QZlQtvAUXUNNc6H2mKEFZhQ-fjP1QCIsxsS5BrhaXvU,16547
|
81
81
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
82
82
|
model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
|
83
83
|
model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
|
@@ -157,7 +157,7 @@ model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiO
|
|
157
157
|
model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
|
158
158
|
model_compression_toolkit/core/keras/data_util.py,sha256=JdomIJZfep0QYPtx2jlg0xJ40cd9S_I7BakaWQi0wKw,2681
|
159
159
|
model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
|
160
|
-
model_compression_toolkit/core/keras/keras_implementation.py,sha256=
|
160
|
+
model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
|
161
161
|
model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
|
162
162
|
model_compression_toolkit/core/keras/keras_node_prior_info.py,sha256=HUmzEXDQ8LGX7uOYSRiLZ2TNbYxLX9J9IeAa6QYlifg,3927
|
163
163
|
model_compression_toolkit/core/keras/resource_utilization_data_facade.py,sha256=s56UIgiPipUQRNd2sd1xW6GFfYNMBmrocRCNtvpYLbY,4977
|
@@ -167,7 +167,7 @@ model_compression_toolkit/core/keras/back2framework/factory_model_builder.py,sha
|
|
167
167
|
model_compression_toolkit/core/keras/back2framework/float_model_builder.py,sha256=9SFHhX-JnkB8PvYIIHRYlReBDI_RkZY9LditzW_ElLk,2444
|
168
168
|
model_compression_toolkit/core/keras/back2framework/instance_builder.py,sha256=fBj13c6zkVoWX4JJG18_uXPptiEJqXClE_zFbaFB6Q8,4517
|
169
169
|
model_compression_toolkit/core/keras/back2framework/keras_model_builder.py,sha256=ns3zFjngea7yZHrxqcV5FQCAfGcstho37D2S2KQZpwE,18444
|
170
|
-
model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=
|
170
|
+
model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=emsaCYyZBF7oQfXAR0edU7idiMInXLXRuGPcrUp4slM,15301
|
171
171
|
model_compression_toolkit/core/keras/back2framework/quantized_model_builder.py,sha256=5wFb4nx_F0Wu4c8pLf6n6OzxOHtpOJ6_3mQsNSXIudU,2481
|
172
172
|
model_compression_toolkit/core/keras/graph_substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
173
173
|
model_compression_toolkit/core/keras/graph_substitutions/substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
|
@@ -223,7 +223,7 @@ model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVv
|
|
223
223
|
model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
|
224
224
|
model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
|
225
225
|
model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
|
226
|
-
model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=
|
226
|
+
model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Xwt7eHS-QJJc1fyOrxL2tz8E2CP-b2M0_R-Dgb1Gm-4,29558
|
227
227
|
model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
|
228
228
|
model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
|
229
229
|
model_compression_toolkit/core/pytorch/utils.py,sha256=7VbgcLwtQvdEEc_AJgSOQ3U3KRKCICFPaBirN1fIQxg,3940
|
@@ -559,8 +559,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
559
559
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
|
560
560
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
561
561
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
562
|
-
mct_nightly-2.2.0.
|
563
|
-
mct_nightly-2.2.0.
|
564
|
-
mct_nightly-2.2.0.
|
565
|
-
mct_nightly-2.2.0.
|
566
|
-
mct_nightly-2.2.0.
|
562
|
+
mct_nightly-2.2.0.20241204.524.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
563
|
+
mct_nightly-2.2.0.20241204.524.dist-info/METADATA,sha256=O3ETKzNDjZGmSvp_WVmqIJz-jyk93WLG676QjyRsISs,26446
|
564
|
+
mct_nightly-2.2.0.20241204.524.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
565
|
+
mct_nightly-2.2.0.20241204.524.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
566
|
+
mct_nightly-2.2.0.20241204.524.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.
|
30
|
+
__version__ = "2.2.0.20241204.000524"
|
@@ -542,9 +542,14 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
542
542
|
"""
|
543
543
|
# configurability is only relevant for kernel attribute quantization
|
544
544
|
potential_conf_nodes = [n for n in list(self) if fw_info.is_kernel_op(n.type)]
|
545
|
-
|
546
|
-
|
547
|
-
|
545
|
+
|
546
|
+
def is_configurable(n):
|
547
|
+
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
548
|
+
return (n.is_weights_quantization_enabled(kernel_attr) and
|
549
|
+
not n.is_all_weights_candidates_equal(kernel_attr) and
|
550
|
+
(not n.reuse or include_reused_nodes))
|
551
|
+
|
552
|
+
return [n for n in potential_conf_nodes if is_configurable(n)]
|
548
553
|
|
549
554
|
def get_sorted_weights_configurable_nodes(self,
|
550
555
|
fw_info: FrameworkInfo,
|
@@ -571,8 +576,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
571
576
|
Returns:
|
572
577
|
A list of nodes that their activation can be configured (namely, has one or more activation qc candidate).
|
573
578
|
"""
|
574
|
-
return list(
|
575
|
-
and not n.is_all_activation_candidates_equal(), list(self)))
|
579
|
+
return [n for n in list(self) if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal()]
|
576
580
|
|
577
581
|
def get_sorted_activation_configurable_nodes(self) -> List[BaseNode]:
|
578
582
|
"""
|
@@ -384,9 +384,8 @@ class BaseNode:
|
|
384
384
|
"""
|
385
385
|
# note that if the given attribute name does not exist in the node's attributes mapping,
|
386
386
|
# the inner method would log an exception.
|
387
|
-
|
388
|
-
|
389
|
-
for attr_candidate in self.get_all_weights_attr_candidates(attr))
|
387
|
+
candidates = self.get_all_weights_attr_candidates(attr)
|
388
|
+
return all(candidate == candidates[0] for candidate in candidates[1:])
|
390
389
|
|
391
390
|
def has_kernel_weight_to_quantize(self, fw_info):
|
392
391
|
"""
|
@@ -40,41 +40,38 @@ def filter_candidates_for_mixed_precision(graph: Graph,
|
|
40
40
|
|
41
41
|
"""
|
42
42
|
|
43
|
-
|
44
|
-
|
43
|
+
tru = target_resource_utilization
|
44
|
+
if tru.total_mem_restricted() or tru.bops_restricted():
|
45
|
+
return
|
45
46
|
|
46
|
-
if
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
c.activation_quantization_cfg.activation_n_bits == base_cfg_nbits]
|
47
|
+
if tru.weight_restricted() and not tru.activation_restricted():
|
48
|
+
# Running mixed precision for weights compression only -
|
49
|
+
# filter out candidates activation only configurable node
|
50
|
+
weights_conf = graph.get_weights_configurable_nodes(fw_info)
|
51
|
+
activation_configurable_nodes = [n for n in graph.get_activation_configurable_nodes() if n not in weights_conf]
|
52
|
+
for n in activation_configurable_nodes:
|
53
|
+
base_cfg_nbits = n.get_qco(tpc).base_config.activation_n_bits
|
54
|
+
filtered_conf = [c for c in n.candidates_quantization_cfg if
|
55
|
+
c.activation_quantization_cfg.enable_activation_quantization and
|
56
|
+
c.activation_quantization_cfg.activation_n_bits == base_cfg_nbits]
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
58
|
+
if len(filtered_conf) != 1:
|
59
|
+
Logger.critical(f"Running weights only mixed precision failed on layer {n.name} with multiple "
|
60
|
+
f"activation quantization configurations.") # pragma: no cover
|
61
|
+
n.candidates_quantization_cfg = filtered_conf
|
62
62
|
|
63
|
-
elif
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
Logger.critical(f"Running activation only mixed precision failed on layer {n.name} with multiple "
|
79
|
-
f"weights quantization configurations.") # pragma: no cover
|
80
|
-
n.candidates_quantization_cfg = filtered_conf
|
63
|
+
elif tru.activation_restricted() and not tru.weight_restricted():
|
64
|
+
# Running mixed precision for activation compression only -
|
65
|
+
# filter out candidates weights only configurable node
|
66
|
+
activation_conf = graph.get_activation_configurable_nodes()
|
67
|
+
weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes(fw_info) if n not in activation_conf]
|
68
|
+
for n in weight_configurable_nodes:
|
69
|
+
kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
|
70
|
+
base_cfg_nbits = n.get_qco(tpc).base_config.attr_weights_configs_mapping[kernel_attr].weights_n_bits
|
71
|
+
filtered_conf = [c for c in n.candidates_quantization_cfg if
|
72
|
+
c.weights_quantization_cfg.get_attr_config(kernel_attr).enable_weights_quantization and
|
73
|
+
c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits == base_cfg_nbits]
|
74
|
+
if len(filtered_conf) != 1:
|
75
|
+
Logger.critical(f"Running activation only mixed precision failed on layer {n.name} with multiple "
|
76
|
+
f"weights quantization configurations.") # pragma: no cover
|
77
|
+
n.candidates_quantization_cfg = filtered_conf
|
@@ -83,16 +83,17 @@ def search_bit_width(graph_to_search_cfg: Graph,
|
|
83
83
|
|
84
84
|
# Set graph for MP search
|
85
85
|
graph = copy.deepcopy(graph_to_search_cfg) # Copy graph before searching
|
86
|
-
if target_resource_utilization.
|
86
|
+
if target_resource_utilization.bops_restricted():
|
87
87
|
# Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
|
88
88
|
graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())
|
89
89
|
|
90
90
|
# If we only run weights compression with MP than no need to consider activation quantization when computing the
|
91
91
|
# MP metric (it adds noise to the computation)
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
92
|
+
tru = target_resource_utilization
|
93
|
+
weight_only_restricted = tru.weight_restricted() and not (tru.activation_restricted() or
|
94
|
+
tru.total_mem_restricted() or
|
95
|
+
tru.bops_restricted())
|
96
|
+
disable_activation_for_metric = weight_only_restricted or graph_to_search_cfg.is_single_activation_cfg()
|
96
97
|
|
97
98
|
# Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
|
98
99
|
# even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
|
@@ -117,11 +118,10 @@ def search_bit_width(graph_to_search_cfg: Graph,
|
|
117
118
|
target_resource_utilization,
|
118
119
|
original_graph=graph_to_search_cfg)
|
119
120
|
|
120
|
-
if search_method in search_methods:
|
121
|
-
|
122
|
-
else:
|
123
|
-
raise NotImplemented # pragma: no cover
|
121
|
+
if search_method not in search_methods:
|
122
|
+
raise NotImplementedError() # pragma: no cover
|
124
123
|
|
124
|
+
search_method_fn = search_methods[search_method]
|
125
125
|
# Search for the desired mixed-precision configuration
|
126
126
|
result_bit_cfg = search_method_fn(search_manager,
|
127
127
|
target_resource_utilization)
|
@@ -211,14 +211,8 @@ class MixedPrecisionSearchManager:
|
|
211
211
|
Returns: Node's resource utilization vector.
|
212
212
|
|
213
213
|
"""
|
214
|
-
|
215
|
-
|
216
|
-
self.min_ru_config,
|
217
|
-
conf_node_idx,
|
218
|
-
candidate_idx),
|
219
|
-
self.graph,
|
220
|
-
self.fw_info,
|
221
|
-
self.fw_impl)
|
214
|
+
cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
|
215
|
+
return self.compute_ru_functions[target].metric_fn(cfg, self.graph, self.fw_info, self.fw_impl)
|
222
216
|
|
223
217
|
@staticmethod
|
224
218
|
def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
|
@@ -253,7 +247,7 @@ class MixedPrecisionSearchManager:
|
|
253
247
|
if target == RUTarget.BOPS:
|
254
248
|
ru_vector = None
|
255
249
|
else:
|
256
|
-
ru_vector = self.compute_ru_functions[target]
|
250
|
+
ru_vector = self.compute_ru_functions[target].metric_fn([], self.graph, self.fw_info, self.fw_impl)
|
257
251
|
|
258
252
|
non_conf_ru_dict[target] = ru_vector
|
259
253
|
|
@@ -282,9 +276,9 @@ class MixedPrecisionSearchManager:
|
|
282
276
|
configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
|
283
277
|
non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
|
284
278
|
if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
|
285
|
-
ru_ru = self.compute_ru_functions[ru_target]
|
279
|
+
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
|
286
280
|
else:
|
287
|
-
ru_ru = self.compute_ru_functions[ru_target]
|
281
|
+
ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(
|
288
282
|
np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)
|
289
283
|
|
290
284
|
ru_dict[ru_target] = ru_ru[0]
|
@@ -72,6 +72,18 @@ class ResourceUtilization:
|
|
72
72
|
f"Total_memory: {self.total_memory}, " \
|
73
73
|
f"BOPS: {self.bops}"
|
74
74
|
|
75
|
+
def weight_restricted(self):
|
76
|
+
return self.weights_memory < np.inf
|
77
|
+
|
78
|
+
def activation_restricted(self):
|
79
|
+
return self.activation_memory < np.inf
|
80
|
+
|
81
|
+
def total_mem_restricted(self):
|
82
|
+
return self.total_memory < np.inf
|
83
|
+
|
84
|
+
def bops_restricted(self):
|
85
|
+
return self.bops < np.inf
|
86
|
+
|
75
87
|
def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
|
76
88
|
"""
|
77
89
|
Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
|
@@ -12,6 +12,8 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
+
from typing import NamedTuple
|
16
|
+
|
15
17
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
|
16
18
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
17
19
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
@@ -20,7 +22,12 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
|
|
20
22
|
# When adding a RUTarget that we want to consider in our mp search,
|
21
23
|
# a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
|
22
24
|
# aggregation function should be added to this dictionary
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
class RuFunctions(NamedTuple):
|
26
|
+
metric_fn: MpRuMetric
|
27
|
+
aggregate_fn: MpRuAggregation
|
28
|
+
|
29
|
+
|
30
|
+
ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
|
31
|
+
RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
|
32
|
+
RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
|
33
|
+
RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
|
@@ -221,9 +221,9 @@ def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
|
|
221
221
|
# search_manager.compute_ru_functions contains a pair of ru_metric and ru_aggregation for each ru target
|
222
222
|
# get aggregated ru, considering both configurable and non-configurable nodes
|
223
223
|
if non_conf_ru_vector is None or len(non_conf_ru_vector) == 0:
|
224
|
-
aggr_ru = search_manager.compute_ru_functions[target]
|
224
|
+
aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(ru_sum_vector)
|
225
225
|
else:
|
226
|
-
aggr_ru = search_manager.compute_ru_functions[target]
|
226
|
+
aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(np.concatenate([ru_sum_vector, non_conf_ru_vector]))
|
227
227
|
|
228
228
|
for v in aggr_ru:
|
229
229
|
if isinstance(v, float):
|
@@ -261,9 +261,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
|
261
261
|
Logger.info('Starting to evaluate metrics')
|
262
262
|
layer_to_metrics_mapping = {}
|
263
263
|
|
264
|
-
|
265
|
-
|
266
|
-
if is_bops_target_resource_utilization:
|
264
|
+
if target_resource_utilization.bops_restricted():
|
267
265
|
origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
|
268
266
|
max_config_value = search_manager.compute_metric_fn(origin_max_config)
|
269
267
|
else:
|
@@ -284,7 +282,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
|
|
284
282
|
mp_model_configuration[node_idx] = bitwidth_idx
|
285
283
|
|
286
284
|
# Build a distance matrix using the function we got from the framework implementation.
|
287
|
-
if
|
285
|
+
if target_resource_utilization.bops_restricted():
|
288
286
|
# Reconstructing original graph's configuration from virtual graph's configuration
|
289
287
|
origin_mp_model_configuration = \
|
290
288
|
search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
|
@@ -90,7 +90,7 @@ class SensitivityEvaluation:
|
|
90
90
|
quant_config.num_interest_points_factor)
|
91
91
|
|
92
92
|
# We use normalized MSE when not running hessian-based. For Hessian-based normalized MSE is not needed
|
93
|
-
#
|
93
|
+
# because hessian weights already do normalization.
|
94
94
|
use_normalized_mse = self.quant_config.use_hessian_based_scores is False
|
95
95
|
self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points, use_normalized_mse)
|
96
96
|
|
@@ -116,14 +116,11 @@ class SensitivityEvaluation:
|
|
116
116
|
# Build images batches for inference comparison
|
117
117
|
self.images_batches = self._get_images_batches(quant_config.num_of_images)
|
118
118
|
|
119
|
-
# Get baseline model inference on all samples
|
120
|
-
self.baseline_tensors_list = [] # setting from outside scope
|
121
|
-
|
122
119
|
# Casting images tensors to the framework tensor type.
|
123
|
-
self.images_batches =
|
120
|
+
self.images_batches = [self.fw_impl.to_tensor(img) for img in self.images_batches]
|
124
121
|
|
125
122
|
# Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
|
126
|
-
self._init_baseline_tensors_list()
|
123
|
+
self.baseline_tensors_list = self._init_baseline_tensors_list()
|
127
124
|
|
128
125
|
# Computing Hessian-based scores for weighted average distance metric computation (only if requested),
|
129
126
|
# and assigning distance_weighting method accordingly.
|
@@ -193,11 +190,9 @@ class SensitivityEvaluation:
|
|
193
190
|
|
194
191
|
def _init_baseline_tensors_list(self):
|
195
192
|
"""
|
196
|
-
Evaluates the baseline model on all images and
|
197
|
-
Initiates a class variable self.baseline_tensors_list
|
193
|
+
Evaluates the baseline model on all images and returns the obtained lists of tensors in a list for later use.
|
198
194
|
"""
|
199
|
-
|
200
|
-
images))
|
195
|
+
return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.baseline_model, images))
|
201
196
|
for images in self.images_batches]
|
202
197
|
|
203
198
|
def _build_models(self) -> Any:
|
@@ -454,7 +449,7 @@ def get_mp_interest_points(graph: Graph,
|
|
454
449
|
|
455
450
|
"""
|
456
451
|
sorted_nodes = graph.get_topo_sorted_nodes()
|
457
|
-
ip_nodes =
|
452
|
+
ip_nodes = [n for n in sorted_nodes if interest_points_classifier(n)]
|
458
453
|
|
459
454
|
interest_points_nodes = bound_num_interest_points(ip_nodes, num_ip_factor)
|
460
455
|
|
@@ -92,11 +92,8 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
|
|
92
92
|
if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr):
|
93
93
|
weights_conf_nodes_names = [node.name for node in self.graph.get_weights_configurable_nodes(self.fw_info)]
|
94
94
|
if n.name in weights_conf_nodes_names:
|
95
|
-
|
96
|
-
|
97
|
-
kernel_attr: ConfigurableWeightsQuantizer(
|
98
|
-
**self._get_weights_configurable_quantizer_kwargs(n,
|
99
|
-
kernel_attr))})
|
95
|
+
wq = ConfigurableWeightsQuantizer(**self._get_weights_configurable_quantizer_kwargs(n, kernel_attr))
|
96
|
+
return KerasQuantizationWrapper(layer, weights_quantizers={kernel_attr: wq})
|
100
97
|
else:
|
101
98
|
# TODO: Do we want to include other quantized attributes that are not
|
102
99
|
# the kernel attribute in the mixed precision model?
|
@@ -106,12 +103,12 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
|
|
106
103
|
if not len(node_weights_qc) == 1:
|
107
104
|
Logger.critical(f"Expected a unique weights configuration for node {n.name}, but found {len(node_weights_qc)} configurations.")# pragma: no cover
|
108
105
|
|
106
|
+
weights_quant_cfg = node_weights_qc[0].weights_quantization_cfg
|
107
|
+
weights_quant_method = weights_quant_cfg.get_attr_config(kernel_attr).weights_quantization_method
|
109
108
|
quantier_for_node = get_inferable_quantizer_class(QuantizationTarget.Weights,
|
110
|
-
|
111
|
-
.get_attr_config(kernel_attr)
|
112
|
-
.weights_quantization_method,
|
109
|
+
weights_quant_method,
|
113
110
|
BaseKerasInferableQuantizer)
|
114
|
-
kwargs = get_inferable_quantizer_kwargs(
|
111
|
+
kwargs = get_inferable_quantizer_kwargs(weights_quant_cfg,
|
115
112
|
QuantizationTarget.Weights,
|
116
113
|
kernel_attr)
|
117
114
|
|
@@ -438,17 +438,11 @@ class KerasImplementation(FrameworkImplementation):
|
|
438
438
|
node: Node to indicate whether it needs to be part of the interest points set.
|
439
439
|
Returns: True if the node should be considered an interest point, False otherwise.
|
440
440
|
"""
|
441
|
-
|
442
|
-
if node.is_match_type(Activation):
|
443
|
-
node_type_name = node.framework_attr[keras_constants.ACTIVATION]
|
444
|
-
if node_type_name in [keras_constants.SOFTMAX, keras_constants.SIGMOID]:
|
445
|
-
return True
|
446
|
-
elif any([node.is_match_type(_type) for _type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D,
|
447
|
-
DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate, tf.concat,
|
448
|
-
Add, tf.add]]):
|
441
|
+
if self.is_softmax(node) or self.is_sigmoid(node):
|
449
442
|
return True
|
450
443
|
|
451
|
-
return
|
444
|
+
return any([node.is_match_type(_type) for _type in [Conv2D, DepthwiseConv2D, Conv2DTranspose, Dense,
|
445
|
+
Concatenate, tf.concat, Add, tf.add]])
|
452
446
|
|
453
447
|
def get_mp_node_distance_fn(self, n: BaseNode,
|
454
448
|
compute_distance_fn: Callable = None,
|
@@ -466,32 +460,34 @@ class KerasImplementation(FrameworkImplementation):
|
|
466
460
|
Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
|
467
461
|
"""
|
468
462
|
|
469
|
-
axis = n.framework_attr.get(keras_constants.AXIS)
|
470
|
-
if not isinstance(n, FunctionalNode) else n.op_call_kwargs.get(keras_constants.AXIS)
|
471
|
-
|
472
|
-
layer_class = n.layer_class
|
473
|
-
framework_attrs = n.framework_attr
|
463
|
+
axis = n.op_call_kwargs.get(keras_constants.AXIS) if isinstance(n, FunctionalNode) else n.framework_attr.get(keras_constants.AXIS)
|
474
464
|
|
475
465
|
if compute_distance_fn is not None:
|
476
466
|
return compute_distance_fn, axis
|
477
467
|
|
478
|
-
if
|
479
|
-
|
480
|
-
if node_type_name == SOFTMAX and axis is not None:
|
481
|
-
return compute_kl_divergence, axis
|
482
|
-
elif node_type_name == SIGMOID:
|
483
|
-
return compute_cs, axis
|
484
|
-
elif axis is not None and (layer_class == tf.nn.softmax or layer_class == tf.keras.layers.Softmax
|
485
|
-
or (layer_class == TFOpLambda and
|
486
|
-
SOFTMAX in framework_attrs[keras_constants.FUNCTION])):
|
468
|
+
# TODO should we really return mse if axis is None? Error? Fill default?
|
469
|
+
if self.is_softmax(n) and axis is not None:
|
487
470
|
return compute_kl_divergence, axis
|
488
|
-
|
489
|
-
|
490
|
-
return compute_cs, axis
|
491
|
-
elif layer_class == Dense:
|
471
|
+
|
472
|
+
if self.is_sigmoid(n) or n.layer_class == Dense:
|
492
473
|
return compute_cs, axis
|
474
|
+
|
493
475
|
return partial(compute_mse, norm=norm_mse), axis
|
494
476
|
|
477
|
+
@staticmethod
|
478
|
+
def is_sigmoid(node: BaseNode):
|
479
|
+
cls = node.layer_class
|
480
|
+
return ((cls == Activation and node.framework_attr[ACTIVATION] == SIGMOID) or
|
481
|
+
cls == tf.nn.sigmoid or
|
482
|
+
cls == TFOpLambda and SIGMOID in node.framework_attr[keras_constants.FUNCTION])
|
483
|
+
|
484
|
+
@staticmethod
|
485
|
+
def is_softmax(node: BaseNode):
|
486
|
+
cls = node.layer_class
|
487
|
+
return ((cls == Activation and node.framework_attr[ACTIVATION] == SOFTMAX) or
|
488
|
+
cls in [tf.nn.softmax, tf.keras.layers.Softmax] or
|
489
|
+
cls == TFOpLambda and SOFTMAX in node.framework_attr[keras_constants.FUNCTION])
|
490
|
+
|
495
491
|
def get_hessian_scores_calculator(self,
|
496
492
|
graph: Graph,
|
497
493
|
input_images: List[Any],
|
@@ -427,10 +427,8 @@ class PytorchImplementation(FrameworkImplementation):
|
|
427
427
|
Returns: True if the node should be considered an interest point, False otherwise.
|
428
428
|
"""
|
429
429
|
|
430
|
-
|
431
|
-
|
432
|
-
return True
|
433
|
-
return False
|
430
|
+
return any(node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
|
431
|
+
softmax, operator.add, add, cat, operator.concat])
|
434
432
|
|
435
433
|
def get_mp_node_distance_fn(self, n: BaseNode,
|
436
434
|
compute_distance_fn: Callable = None,
|
{mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/LICENSE.md
RENAMED
File without changes
|
{mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|