mct-nightly 2.2.0.20241202.131715__py3-none-any.whl → 2.2.0.20241204.524__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/RECORD +18 -18
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/core/common/graph/base_graph.py +9 -5
  5. model_compression_toolkit/core/common/graph/base_node.py +2 -3
  6. model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +32 -35
  7. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +9 -9
  8. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +5 -11
  9. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +12 -0
  10. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +11 -4
  11. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +4 -6
  12. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +6 -11
  13. model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +6 -9
  14. model_compression_toolkit/core/keras/keras_implementation.py +23 -27
  15. model_compression_toolkit/core/pytorch/pytorch_implementation.py +2 -4
  16. {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/LICENSE.md +0 -0
  17. {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/WHEEL +0 -0
  18. {mct_nightly-2.2.0.20241202.131715.dist-info → mct_nightly-2.2.0.20241204.524.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mct-nightly
3
- Version: 2.2.0.20241202.131715
3
+ Version: 2.2.0.20241204.524
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- model_compression_toolkit/__init__.py,sha256=Y0S0cGZR8YyEXFmrpzm6m5TafU10MMyI_bHB3477yZE,1573
1
+ model_compression_toolkit/__init__.py,sha256=CwUJhq62PjrcRJgkwwmu5dArRV2bz7lgnxc2ebnm840,1573
2
2
  model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
3
3
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
4
4
  model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -32,8 +32,8 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
32
32
  model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=8seu9jBpC7HartP1nJd7S_SYFICyemVpDV9ZJ0QUQ7E,6212
33
33
  model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
34
34
  model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
35
- model_compression_toolkit/core/common/graph/base_graph.py,sha256=lg5QaBkRbmvM3tGZ0Q34S3m0CbFql3LUv5BaXLe5TG8,37824
36
- model_compression_toolkit/core/common/graph/base_node.py,sha256=UygLaWvpd_-rXVn-QdPORHRaHCywaemokgbhy9MK52E,31837
35
+ model_compression_toolkit/core/common/graph/base_graph.py,sha256=GG13PAtndsMjIqINfrCN6llVkFrg5CBfij4z99ntieU,37815
36
+ model_compression_toolkit/core/common/graph/base_node.py,sha256=mGiDcHnL5KybDYSiONSWtjrHOXI6tjjfACfjv2Piogc,31756
37
37
  model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
38
38
  model_compression_toolkit/core/common/graph/functional_node.py,sha256=GH5wStmw8SoAj5IdT_-ItN1Meo_P5NUTt_5bgJC4fak,3935
39
39
  model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
@@ -63,21 +63,21 @@ model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256
63
63
  model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
64
64
  model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
65
65
  model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
66
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=klmaMQDeFc3IxRLf6YX4Dw1opFksbLyN10yFHdKAtLo,4875
66
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=nZb0_inMxgqlx5lG6uMF0sskHR-5zMSClzxYALBeqLA,4531
67
67
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
68
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=pk8HRoShDhiUprBC4m1AFQv1SacS4hOrj0MRdbq-5gY,7556
69
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=TTTux4YiOnQqt-2h7Y38959XaDwNZc0eufLMx_yws5U,37578
70
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=CYp2VuxXf95lYivolIuVRjAyaY5dFsDn2qh8ZhTmc9A,27525
68
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=B7xLl8P5eCz0_fBxocDlNiv6k-3MdfMUk2GjYKl2p5k,7522
69
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=hlaV4ybreT0DR4ftLSPg5KTit3BEm9dWA7Y8NHpEJ8w,37532
70
+ model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=adjuvrJcan7Ua3nYlJX7T6qGkCRHGqWMaM5-099a9Us,27220
71
71
  model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
72
72
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=KifDMbm7qkSfvSl6pcZzQ82naIXzeKL6aT-VsvWZYyc,7901
73
73
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
74
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=HILF7CIn-GYPvPmTFyvjWLhuLDwSGwdBcAaKFgVYrwk,4745
74
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=MtPkZfPIJWI191Hbjp6JluUyLnqiJRi3zNf-CqVNuag,5053
75
75
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=sFuUgWwC0aEBpf9zWmCTIcAbykBj3t5vmWAoB_BjYWA,14979
76
76
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py,sha256=ttc8wPa_9LZansutQ2f1ss-RTzgTv739wy3qsdLzyyk,4217
77
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=QhuqaECEGLnYC08iD6-2XXcU7NXbPzYf1sQcjYlGak8,1682
77
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py,sha256=mOxZwOQYnOwSJMiapEEH9o-89ujJdPxSl8zXpnApc0U,1850
78
78
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py,sha256=WC1EHoNuo_lrzy4NRhGJ1cgmJ2IsFsbmP86mrVO3AVA,21506
79
79
  model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
80
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=7ceagUJVk3GgLrD8HnmXMgZTRLjEvZW_RnzDR3ahNDM,16592
80
+ model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=QZlQtvAUXUNNc6H2mKEFZhQ-fjP1QCIsxsS5BrhaXvU,16547
81
81
  model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
82
82
  model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
83
83
  model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -157,7 +157,7 @@ model_compression_toolkit/core/keras/constants.py,sha256=dh4elQWt6Q6NYRht5k5RiiO
157
157
  model_compression_toolkit/core/keras/custom_layer_validation.py,sha256=f-b14wuiIgitBe7d0MmofYhDCTO3IhwJgwrh-Hq_t_U,1192
158
158
  model_compression_toolkit/core/keras/data_util.py,sha256=JdomIJZfep0QYPtx2jlg0xJ40cd9S_I7BakaWQi0wKw,2681
159
159
  model_compression_toolkit/core/keras/default_framework_info.py,sha256=PYcER89eEXjKtR0T7-2Y4f7cckqoD5OQbpHePoRkMec,5030
160
- model_compression_toolkit/core/keras/keras_implementation.py,sha256=Hi8seiFJdFqgYGGC003Y4879JQ7rmVZe8YiJ76T7FDE,32133
160
+ model_compression_toolkit/core/keras/keras_implementation.py,sha256=HwbIR7x4t-TBNbWHVvVNFk8z-KFt6zM0LWAUXQuNZrk,31753
161
161
  model_compression_toolkit/core/keras/keras_model_validation.py,sha256=1wNV2clFdC9BzIELRLSO2uKf0xqjLqlkTJudwtCeaJk,1722
162
162
  model_compression_toolkit/core/keras/keras_node_prior_info.py,sha256=HUmzEXDQ8LGX7uOYSRiLZ2TNbYxLX9J9IeAa6QYlifg,3927
163
163
  model_compression_toolkit/core/keras/resource_utilization_data_facade.py,sha256=s56UIgiPipUQRNd2sd1xW6GFfYNMBmrocRCNtvpYLbY,4977
@@ -167,7 +167,7 @@ model_compression_toolkit/core/keras/back2framework/factory_model_builder.py,sha
167
167
  model_compression_toolkit/core/keras/back2framework/float_model_builder.py,sha256=9SFHhX-JnkB8PvYIIHRYlReBDI_RkZY9LditzW_ElLk,2444
168
168
  model_compression_toolkit/core/keras/back2framework/instance_builder.py,sha256=fBj13c6zkVoWX4JJG18_uXPptiEJqXClE_zFbaFB6Q8,4517
169
169
  model_compression_toolkit/core/keras/back2framework/keras_model_builder.py,sha256=ns3zFjngea7yZHrxqcV5FQCAfGcstho37D2S2KQZpwE,18444
170
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=ygIS1WIiftF1VC3oGhc8N6j7MryKtWgEg8nr50p7f4U,15587
170
+ model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py,sha256=emsaCYyZBF7oQfXAR0edU7idiMInXLXRuGPcrUp4slM,15301
171
171
  model_compression_toolkit/core/keras/back2framework/quantized_model_builder.py,sha256=5wFb4nx_F0Wu4c8pLf6n6OzxOHtpOJ6_3mQsNSXIudU,2481
172
172
  model_compression_toolkit/core/keras/graph_substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
173
173
  model_compression_toolkit/core/keras/graph_substitutions/substitutions/__init__.py,sha256=mjbqLD-KcG3eNeCYpu1GBS7VclGVOQ63x2p6mAAuba4,698
@@ -223,7 +223,7 @@ model_compression_toolkit/core/pytorch/constants.py,sha256=YwD_joIF0vK8UG2vW1NVv
223
223
  model_compression_toolkit/core/pytorch/data_util.py,sha256=YYbT135HhlTt0q6XdD2JX7AS_L92f_uV2rWq2hsJOCA,6325
224
224
  model_compression_toolkit/core/pytorch/default_framework_info.py,sha256=-Vls1P_8Ckm_18nnOsmQkZ71SmzHwtQLbQ383Z4Rb-U,4365
225
225
  model_compression_toolkit/core/pytorch/pytorch_device_config.py,sha256=S25cuw10AW3SEN_fRAGRcG_I3wdvvQx1ehSJzPnn-UI,4404
226
- model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=SgxmSdzAQOPI9YHt4Q9-OeDi8fzAdgASHQ4nZ5maPsg,29599
226
+ model_compression_toolkit/core/pytorch/pytorch_implementation.py,sha256=Xwt7eHS-QJJc1fyOrxL2tz8E2CP-b2M0_R-Dgb1Gm-4,29558
227
227
  model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py,sha256=2LDQ7qupglHQ7o1Am7LWdfYVacfQnl-aW2N6l9det1w,3264
228
228
  model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py,sha256=xpKj99OZKT9NT0vKIl_cOe8d89d2gef1gKoNT6PFElE,4989
229
229
  model_compression_toolkit/core/pytorch/utils.py,sha256=7VbgcLwtQvdEEc_AJgSOQ3U3KRKCICFPaBirN1fIQxg,3940
@@ -559,8 +559,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
559
559
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
560
560
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
561
561
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
562
- mct_nightly-2.2.0.20241202.131715.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
563
- mct_nightly-2.2.0.20241202.131715.dist-info/METADATA,sha256=YvbvKkI18wDQ20u0NKp5GtD5gZWfoWMYEXcYbMbNNeg,26449
564
- mct_nightly-2.2.0.20241202.131715.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
565
- mct_nightly-2.2.0.20241202.131715.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
566
- mct_nightly-2.2.0.20241202.131715.dist-info/RECORD,,
562
+ mct_nightly-2.2.0.20241204.524.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
563
+ mct_nightly-2.2.0.20241204.524.dist-info/METADATA,sha256=O3ETKzNDjZGmSvp_WVmqIJz-jyk93WLG676QjyRsISs,26446
564
+ mct_nightly-2.2.0.20241204.524.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
565
+ mct_nightly-2.2.0.20241204.524.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
566
+ mct_nightly-2.2.0.20241204.524.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.2.0.20241202.131715"
30
+ __version__ = "2.2.0.20241204.000524"
@@ -542,9 +542,14 @@ class Graph(nx.MultiDiGraph, GraphSearches):
542
542
  """
543
543
  # configurability is only relevant for kernel attribute quantization
544
544
  potential_conf_nodes = [n for n in list(self) if fw_info.is_kernel_op(n.type)]
545
- return list(filter(lambda n: n.is_weights_quantization_enabled(fw_info.get_kernel_op_attributes(n.type)[0])
546
- and not n.is_all_weights_candidates_equal(fw_info.get_kernel_op_attributes(n.type)[0])
547
- and (not n.reuse or include_reused_nodes), potential_conf_nodes))
545
+
546
+ def is_configurable(n):
547
+ kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
548
+ return (n.is_weights_quantization_enabled(kernel_attr) and
549
+ not n.is_all_weights_candidates_equal(kernel_attr) and
550
+ (not n.reuse or include_reused_nodes))
551
+
552
+ return [n for n in potential_conf_nodes if is_configurable(n)]
548
553
 
549
554
  def get_sorted_weights_configurable_nodes(self,
550
555
  fw_info: FrameworkInfo,
@@ -571,8 +576,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
571
576
  Returns:
572
577
  A list of nodes that their activation can be configured (namely, has one or more activation qc candidate).
573
578
  """
574
- return list(filter(lambda n: n.is_activation_quantization_enabled()
575
- and not n.is_all_activation_candidates_equal(), list(self)))
579
+ return [n for n in list(self) if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal()]
576
580
 
577
581
  def get_sorted_activation_configurable_nodes(self) -> List[BaseNode]:
578
582
  """
@@ -384,9 +384,8 @@ class BaseNode:
384
384
  """
385
385
  # note that if the given attribute name does not exist in the node's attributes mapping,
386
386
  # the inner method would log an exception.
387
- return all(attr_candidate ==
388
- self.candidates_quantization_cfg[0].weights_quantization_cfg.get_attr_config(attr)
389
- for attr_candidate in self.get_all_weights_attr_candidates(attr))
387
+ candidates = self.get_all_weights_attr_candidates(attr)
388
+ return all(candidate == candidates[0] for candidate in candidates[1:])
390
389
 
391
390
  def has_kernel_weight_to_quantize(self, fw_info):
392
391
  """
@@ -40,41 +40,38 @@ def filter_candidates_for_mixed_precision(graph: Graph,
40
40
 
41
41
  """
42
42
 
43
- no_total_restrictions = (target_resource_utilization.total_memory == np.inf and
44
- target_resource_utilization.bops == np.inf)
43
+ tru = target_resource_utilization
44
+ if tru.total_mem_restricted() or tru.bops_restricted():
45
+ return
45
46
 
46
- if target_resource_utilization.weights_memory < np.inf:
47
- if target_resource_utilization.activation_memory == np.inf and no_total_restrictions:
48
- # Running mixed precision for weights compression only -
49
- # filter out candidates activation only configurable node
50
- weights_conf = graph.get_weights_configurable_nodes(fw_info)
51
- for n in graph.get_activation_configurable_nodes():
52
- if n not in weights_conf:
53
- base_cfg_nbits = n.get_qco(tpc).base_config.activation_n_bits
54
- filtered_conf = [c for c in n.candidates_quantization_cfg if
55
- c.activation_quantization_cfg.enable_activation_quantization and
56
- c.activation_quantization_cfg.activation_n_bits == base_cfg_nbits]
47
+ if tru.weight_restricted() and not tru.activation_restricted():
48
+ # Running mixed precision for weights compression only -
49
+ # filter out candidates activation only configurable node
50
+ weights_conf = graph.get_weights_configurable_nodes(fw_info)
51
+ activation_configurable_nodes = [n for n in graph.get_activation_configurable_nodes() if n not in weights_conf]
52
+ for n in activation_configurable_nodes:
53
+ base_cfg_nbits = n.get_qco(tpc).base_config.activation_n_bits
54
+ filtered_conf = [c for c in n.candidates_quantization_cfg if
55
+ c.activation_quantization_cfg.enable_activation_quantization and
56
+ c.activation_quantization_cfg.activation_n_bits == base_cfg_nbits]
57
57
 
58
- if len(filtered_conf) != 1:
59
- Logger.critical(f"Running weights only mixed precision failed on layer {n.name} with multiple "
60
- f"activation quantization configurations.") # pragma: no cover
61
- n.candidates_quantization_cfg = filtered_conf
58
+ if len(filtered_conf) != 1:
59
+ Logger.critical(f"Running weights only mixed precision failed on layer {n.name} with multiple "
60
+ f"activation quantization configurations.") # pragma: no cover
61
+ n.candidates_quantization_cfg = filtered_conf
62
62
 
63
- elif target_resource_utilization.activation_memory < np.inf:
64
- if target_resource_utilization.weights_memory == np.inf and no_total_restrictions:
65
- # Running mixed precision for activation compression only -
66
- # filter out candidates weights only configurable node
67
- activation_conf = graph.get_activation_configurable_nodes()
68
- for n in graph.get_weights_configurable_nodes(fw_info):
69
- if n not in activation_conf:
70
- kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
71
- base_cfg_nbits = n.get_qco(tpc).base_config.attr_weights_configs_mapping[kernel_attr].weights_n_bits
72
- filtered_conf = [c for c in n.candidates_quantization_cfg if
73
- c.weights_quantization_cfg.get_attr_config(
74
- kernel_attr).enable_weights_quantization and
75
- c.weights_quantization_cfg.get_attr_config(
76
- kernel_attr).weights_n_bits == base_cfg_nbits]
77
- if len(filtered_conf) != 1:
78
- Logger.critical(f"Running activation only mixed precision failed on layer {n.name} with multiple "
79
- f"weights quantization configurations.") # pragma: no cover
80
- n.candidates_quantization_cfg = filtered_conf
63
+ elif tru.activation_restricted() and not tru.weight_restricted():
64
+ # Running mixed precision for activation compression only -
65
+ # filter out candidates weights only configurable node
66
+ activation_conf = graph.get_activation_configurable_nodes()
67
+ weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes(fw_info) if n not in activation_conf]
68
+ for n in weight_configurable_nodes:
69
+ kernel_attr = graph.fw_info.get_kernel_op_attributes(n.type)[0]
70
+ base_cfg_nbits = n.get_qco(tpc).base_config.attr_weights_configs_mapping[kernel_attr].weights_n_bits
71
+ filtered_conf = [c for c in n.candidates_quantization_cfg if
72
+ c.weights_quantization_cfg.get_attr_config(kernel_attr).enable_weights_quantization and
73
+ c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits == base_cfg_nbits]
74
+ if len(filtered_conf) != 1:
75
+ Logger.critical(f"Running activation only mixed precision failed on layer {n.name} with multiple "
76
+ f"weights quantization configurations.") # pragma: no cover
77
+ n.candidates_quantization_cfg = filtered_conf
@@ -83,16 +83,17 @@ def search_bit_width(graph_to_search_cfg: Graph,
83
83
 
84
84
  # Set graph for MP search
85
85
  graph = copy.deepcopy(graph_to_search_cfg) # Copy graph before searching
86
- if target_resource_utilization.bops < np.inf:
86
+ if target_resource_utilization.bops_restricted():
87
87
  # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
88
88
  graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())
89
89
 
90
90
  # If we only run weights compression with MP than no need to consider activation quantization when computing the
91
91
  # MP metric (it adds noise to the computation)
92
- disable_activation_for_metric = (target_resource_utilization.weights_memory < np.inf and
93
- (target_resource_utilization.activation_memory == np.inf and
94
- target_resource_utilization.total_memory == np.inf and
95
- target_resource_utilization.bops == np.inf)) or graph_to_search_cfg.is_single_activation_cfg()
92
+ tru = target_resource_utilization
93
+ weight_only_restricted = tru.weight_restricted() and not (tru.activation_restricted() or
94
+ tru.total_mem_restricted() or
95
+ tru.bops_restricted())
96
+ disable_activation_for_metric = weight_only_restricted or graph_to_search_cfg.is_single_activation_cfg()
96
97
 
97
98
  # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
98
99
  # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
@@ -117,11 +118,10 @@ def search_bit_width(graph_to_search_cfg: Graph,
117
118
  target_resource_utilization,
118
119
  original_graph=graph_to_search_cfg)
119
120
 
120
- if search_method in search_methods: # Get a specific search function
121
- search_method_fn = search_methods.get(search_method)
122
- else:
123
- raise NotImplemented # pragma: no cover
121
+ if search_method not in search_methods:
122
+ raise NotImplementedError() # pragma: no cover
124
123
 
124
+ search_method_fn = search_methods[search_method]
125
125
  # Search for the desired mixed-precision configuration
126
126
  result_bit_cfg = search_method_fn(search_manager,
127
127
  target_resource_utilization)
@@ -211,14 +211,8 @@ class MixedPrecisionSearchManager:
211
211
  Returns: Node's resource utilization vector.
212
212
 
213
213
  """
214
- return self.compute_ru_functions[target][0](
215
- self.replace_config_in_index(
216
- self.min_ru_config,
217
- conf_node_idx,
218
- candidate_idx),
219
- self.graph,
220
- self.fw_info,
221
- self.fw_impl)
214
+ cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
215
+ return self.compute_ru_functions[target].metric_fn(cfg, self.graph, self.fw_info, self.fw_impl)
222
216
 
223
217
  @staticmethod
224
218
  def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -253,7 +247,7 @@ class MixedPrecisionSearchManager:
253
247
  if target == RUTarget.BOPS:
254
248
  ru_vector = None
255
249
  else:
256
- ru_vector = self.compute_ru_functions[target][0]([], self.graph, self.fw_info, self.fw_impl)
250
+ ru_vector = self.compute_ru_functions[target].metric_fn([], self.graph, self.fw_info, self.fw_impl)
257
251
 
258
252
  non_conf_ru_dict[target] = ru_vector
259
253
 
@@ -282,9 +276,9 @@ class MixedPrecisionSearchManager:
282
276
  configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
283
277
  non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
284
278
  if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
285
- ru_ru = self.compute_ru_functions[ru_target][1](configurable_nodes_ru_vector, False)
279
+ ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(configurable_nodes_ru_vector, False)
286
280
  else:
287
- ru_ru = self.compute_ru_functions[ru_target][1](
281
+ ru_ru = self.compute_ru_functions[ru_target].aggregate_fn(
288
282
  np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)
289
283
 
290
284
  ru_dict[ru_target] = ru_ru[0]
@@ -72,6 +72,18 @@ class ResourceUtilization:
72
72
  f"Total_memory: {self.total_memory}, " \
73
73
  f"BOPS: {self.bops}"
74
74
 
75
+ def weight_restricted(self):
76
+ return self.weights_memory < np.inf
77
+
78
+ def activation_restricted(self):
79
+ return self.activation_memory < np.inf
80
+
81
+ def total_mem_restricted(self):
82
+ return self.total_memory < np.inf
83
+
84
+ def bops_restricted(self):
85
+ return self.bops < np.inf
86
+
75
87
  def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
76
88
  """
77
89
  Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
+ from typing import NamedTuple
16
+
15
17
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
16
18
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
17
19
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
@@ -20,7 +22,12 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
20
22
  # When adding a RUTarget that we want to consider in our mp search,
21
23
  # a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
22
24
  # aggregation function should be added to this dictionary
23
- ru_functions_mapping = {RUTarget.WEIGHTS: (MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
24
- RUTarget.ACTIVATION: (MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
25
- RUTarget.TOTAL: (MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
26
- RUTarget.BOPS: (MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
25
+ class RuFunctions(NamedTuple):
26
+ metric_fn: MpRuMetric
27
+ aggregate_fn: MpRuAggregation
28
+
29
+
30
+ ru_functions_mapping = {RUTarget.WEIGHTS: RuFunctions(MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
31
+ RUTarget.ACTIVATION: RuFunctions(MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
32
+ RUTarget.TOTAL: RuFunctions(MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
33
+ RUTarget.BOPS: RuFunctions(MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
@@ -221,9 +221,9 @@ def _add_set_of_ru_constraints(search_manager: MixedPrecisionSearchManager,
221
221
  # search_manager.compute_ru_functions contains a pair of ru_metric and ru_aggregation for each ru target
222
222
  # get aggregated ru, considering both configurable and non-configurable nodes
223
223
  if non_conf_ru_vector is None or len(non_conf_ru_vector) == 0:
224
- aggr_ru = search_manager.compute_ru_functions[target][1](ru_sum_vector)
224
+ aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(ru_sum_vector)
225
225
  else:
226
- aggr_ru = search_manager.compute_ru_functions[target][1](np.concatenate([ru_sum_vector, non_conf_ru_vector]))
226
+ aggr_ru = search_manager.compute_ru_functions[target].aggregate_fn(np.concatenate([ru_sum_vector, non_conf_ru_vector]))
227
227
 
228
228
  for v in aggr_ru:
229
229
  if isinstance(v, float):
@@ -261,9 +261,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
261
261
  Logger.info('Starting to evaluate metrics')
262
262
  layer_to_metrics_mapping = {}
263
263
 
264
- is_bops_target_resource_utilization = target_resource_utilization.bops < np.inf
265
-
266
- if is_bops_target_resource_utilization:
264
+ if target_resource_utilization.bops_restricted():
267
265
  origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
268
266
  max_config_value = search_manager.compute_metric_fn(origin_max_config)
269
267
  else:
@@ -284,7 +282,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
284
282
  mp_model_configuration[node_idx] = bitwidth_idx
285
283
 
286
284
  # Build a distance matrix using the function we got from the framework implementation.
287
- if is_bops_target_resource_utilization:
285
+ if target_resource_utilization.bops_restricted():
288
286
  # Reconstructing original graph's configuration from virtual graph's configuration
289
287
  origin_mp_model_configuration = \
290
288
  search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
@@ -90,7 +90,7 @@ class SensitivityEvaluation:
90
90
  quant_config.num_interest_points_factor)
91
91
 
92
92
  # We use normalized MSE when not running hessian-based. For Hessian-based normalized MSE is not needed
93
- # beacause hessian weights already do normalization.
93
+ # because hessian weights already do normalization.
94
94
  use_normalized_mse = self.quant_config.use_hessian_based_scores is False
95
95
  self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points, use_normalized_mse)
96
96
 
@@ -116,14 +116,11 @@ class SensitivityEvaluation:
116
116
  # Build images batches for inference comparison
117
117
  self.images_batches = self._get_images_batches(quant_config.num_of_images)
118
118
 
119
- # Get baseline model inference on all samples
120
- self.baseline_tensors_list = [] # setting from outside scope
121
-
122
119
  # Casting images tensors to the framework tensor type.
123
- self.images_batches = list(map(lambda in_arr: self.fw_impl.to_tensor(in_arr), self.images_batches))
120
+ self.images_batches = [self.fw_impl.to_tensor(img) for img in self.images_batches]
124
121
 
125
122
  # Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
126
- self._init_baseline_tensors_list()
123
+ self.baseline_tensors_list = self._init_baseline_tensors_list()
127
124
 
128
125
  # Computing Hessian-based scores for weighted average distance metric computation (only if requested),
129
126
  # and assigning distance_weighting method accordingly.
@@ -193,11 +190,9 @@ class SensitivityEvaluation:
193
190
 
194
191
  def _init_baseline_tensors_list(self):
195
192
  """
196
- Evaluates the baseline model on all images and saves the obtained lists of tensors in a list for later use.
197
- Initiates a class variable self.baseline_tensors_list
193
+ Evaluates the baseline model on all images and returns the obtained lists of tensors in a list for later use.
198
194
  """
199
- self.baseline_tensors_list = [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.baseline_model,
200
- images))
195
+ return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.baseline_model, images))
201
196
  for images in self.images_batches]
202
197
 
203
198
  def _build_models(self) -> Any:
@@ -454,7 +449,7 @@ def get_mp_interest_points(graph: Graph,
454
449
 
455
450
  """
456
451
  sorted_nodes = graph.get_topo_sorted_nodes()
457
- ip_nodes = list(filter(lambda n: interest_points_classifier(n), sorted_nodes))
452
+ ip_nodes = [n for n in sorted_nodes if interest_points_classifier(n)]
458
453
 
459
454
  interest_points_nodes = bound_num_interest_points(ip_nodes, num_ip_factor)
460
455
 
@@ -92,11 +92,8 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
92
92
  if kernel_attr is not None and n.is_weights_quantization_enabled(kernel_attr):
93
93
  weights_conf_nodes_names = [node.name for node in self.graph.get_weights_configurable_nodes(self.fw_info)]
94
94
  if n.name in weights_conf_nodes_names:
95
- return KerasQuantizationWrapper(layer,
96
- weights_quantizers={
97
- kernel_attr: ConfigurableWeightsQuantizer(
98
- **self._get_weights_configurable_quantizer_kwargs(n,
99
- kernel_attr))})
95
+ wq = ConfigurableWeightsQuantizer(**self._get_weights_configurable_quantizer_kwargs(n, kernel_attr))
96
+ return KerasQuantizationWrapper(layer, weights_quantizers={kernel_attr: wq})
100
97
  else:
101
98
  # TODO: Do we want to include other quantized attributes that are not
102
99
  # the kernel attribute in the mixed precision model?
@@ -106,12 +103,12 @@ class MixedPrecisionKerasModelBuilder(KerasModelBuilder):
106
103
  if not len(node_weights_qc) == 1:
107
104
  Logger.critical(f"Expected a unique weights configuration for node {n.name}, but found {len(node_weights_qc)} configurations.")# pragma: no cover
108
105
 
106
+ weights_quant_cfg = node_weights_qc[0].weights_quantization_cfg
107
+ weights_quant_method = weights_quant_cfg.get_attr_config(kernel_attr).weights_quantization_method
109
108
  quantier_for_node = get_inferable_quantizer_class(QuantizationTarget.Weights,
110
- node_weights_qc[0].weights_quantization_cfg
111
- .get_attr_config(kernel_attr)
112
- .weights_quantization_method,
109
+ weights_quant_method,
113
110
  BaseKerasInferableQuantizer)
114
- kwargs = get_inferable_quantizer_kwargs(node_weights_qc[0].weights_quantization_cfg,
111
+ kwargs = get_inferable_quantizer_kwargs(weights_quant_cfg,
115
112
  QuantizationTarget.Weights,
116
113
  kernel_attr)
117
114
 
@@ -438,17 +438,11 @@ class KerasImplementation(FrameworkImplementation):
438
438
  node: Node to indicate whether it needs to be part of the interest points set.
439
439
  Returns: True if the node should be considered an interest point, False otherwise.
440
440
  """
441
-
442
- if node.is_match_type(Activation):
443
- node_type_name = node.framework_attr[keras_constants.ACTIVATION]
444
- if node_type_name in [keras_constants.SOFTMAX, keras_constants.SIGMOID]:
445
- return True
446
- elif any([node.is_match_type(_type) for _type in [tf.nn.softmax, tf.keras.layers.Softmax, tf.nn.sigmoid, Conv2D,
447
- DepthwiseConv2D, Conv2DTranspose, Dense, Concatenate, tf.concat,
448
- Add, tf.add]]):
441
+ if self.is_softmax(node) or self.is_sigmoid(node):
449
442
  return True
450
443
 
451
- return False
444
+ return any([node.is_match_type(_type) for _type in [Conv2D, DepthwiseConv2D, Conv2DTranspose, Dense,
445
+ Concatenate, tf.concat, Add, tf.add]])
452
446
 
453
447
  def get_mp_node_distance_fn(self, n: BaseNode,
454
448
  compute_distance_fn: Callable = None,
@@ -466,32 +460,34 @@ class KerasImplementation(FrameworkImplementation):
466
460
  Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
467
461
  """
468
462
 
469
- axis = n.framework_attr.get(keras_constants.AXIS) \
470
- if not isinstance(n, FunctionalNode) else n.op_call_kwargs.get(keras_constants.AXIS)
471
-
472
- layer_class = n.layer_class
473
- framework_attrs = n.framework_attr
463
+ axis = n.op_call_kwargs.get(keras_constants.AXIS) if isinstance(n, FunctionalNode) else n.framework_attr.get(keras_constants.AXIS)
474
464
 
475
465
  if compute_distance_fn is not None:
476
466
  return compute_distance_fn, axis
477
467
 
478
- if layer_class == Activation:
479
- node_type_name = framework_attrs[ACTIVATION]
480
- if node_type_name == SOFTMAX and axis is not None:
481
- return compute_kl_divergence, axis
482
- elif node_type_name == SIGMOID:
483
- return compute_cs, axis
484
- elif axis is not None and (layer_class == tf.nn.softmax or layer_class == tf.keras.layers.Softmax
485
- or (layer_class == TFOpLambda and
486
- SOFTMAX in framework_attrs[keras_constants.FUNCTION])):
468
+ # TODO should we really return mse if axis is None? Error? Fill default?
469
+ if self.is_softmax(n) and axis is not None:
487
470
  return compute_kl_divergence, axis
488
- elif layer_class == tf.nn.sigmoid or (layer_class == TFOpLambda and
489
- SIGMOID in framework_attrs[keras_constants.FUNCTION]):
490
- return compute_cs, axis
491
- elif layer_class == Dense:
471
+
472
+ if self.is_sigmoid(n) or n.layer_class == Dense:
492
473
  return compute_cs, axis
474
+
493
475
  return partial(compute_mse, norm=norm_mse), axis
494
476
 
477
+ @staticmethod
478
+ def is_sigmoid(node: BaseNode):
479
+ cls = node.layer_class
480
+ return ((cls == Activation and node.framework_attr[ACTIVATION] == SIGMOID) or
481
+ cls == tf.nn.sigmoid or
482
+ cls == TFOpLambda and SIGMOID in node.framework_attr[keras_constants.FUNCTION])
483
+
484
+ @staticmethod
485
+ def is_softmax(node: BaseNode):
486
+ cls = node.layer_class
487
+ return ((cls == Activation and node.framework_attr[ACTIVATION] == SOFTMAX) or
488
+ cls in [tf.nn.softmax, tf.keras.layers.Softmax] or
489
+ cls == TFOpLambda and SOFTMAX in node.framework_attr[keras_constants.FUNCTION])
490
+
495
491
  def get_hessian_scores_calculator(self,
496
492
  graph: Graph,
497
493
  input_images: List[Any],
@@ -427,10 +427,8 @@ class PytorchImplementation(FrameworkImplementation):
427
427
  Returns: True if the node should be considered an interest point, False otherwise.
428
428
  """
429
429
 
430
- if any([node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
431
- softmax, operator.add, add, cat, operator.concat]]):
432
- return True
433
- return False
430
+ return any(node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
431
+ softmax, operator.add, add, cat, operator.concat])
434
432
 
435
433
  def get_mp_node_distance_fn(self, n: BaseNode,
436
434
  compute_distance_fn: Callable = None,