mct-nightly 2.2.0.20250113.134913__py3-none-any.whl → 2.2.0.20250114.134534__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/RECORD +102 -104
  3. model_compression_toolkit/__init__.py +2 -2
  4. model_compression_toolkit/core/common/framework_info.py +1 -3
  5. model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
  6. model_compression_toolkit/core/common/graph/base_graph.py +20 -21
  7. model_compression_toolkit/core/common/graph/base_node.py +44 -17
  8. model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
  9. model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +187 -0
  10. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
  11. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +35 -162
  12. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
  13. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +668 -0
  14. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
  15. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +74 -51
  16. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
  17. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
  18. model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
  19. model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
  20. model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
  21. model_compression_toolkit/core/common/pruning/pruner.py +5 -3
  22. model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
  23. model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
  24. model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
  25. model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
  26. model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
  27. model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
  28. model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
  29. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
  30. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
  31. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
  32. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
  33. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
  34. model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
  35. model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
  36. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
  37. model_compression_toolkit/core/graph_prep_runner.py +12 -11
  38. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  39. model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
  40. model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
  41. model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
  42. model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
  43. model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
  44. model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
  45. model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
  46. model_compression_toolkit/core/runner.py +33 -60
  47. model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
  48. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
  49. model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
  50. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  51. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  52. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  53. model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
  54. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  55. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  56. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  57. model_compression_toolkit/metadata.py +11 -10
  58. model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
  59. model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
  60. model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
  61. model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
  62. model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
  63. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
  64. model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  65. model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
  66. model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
  67. model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
  68. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  69. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
  70. model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
  71. model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
  72. model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
  73. model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
  74. model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
  75. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
  76. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
  77. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
  78. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
  79. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
  80. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
  81. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
  82. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
  83. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
  84. model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
  85. model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
  86. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
  87. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
  88. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
  89. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
  90. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
  91. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
  92. model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
  93. model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
  94. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
  95. model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
  96. model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
  97. model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
  98. model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
  99. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
  100. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
  101. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +0 -528
  102. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
  103. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/LICENSE.md +0 -0
  104. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/WHEEL +0 -0
  105. {mct_nightly-2.2.0.20250113.134913.dist-info → mct_nightly-2.2.0.20250114.134534.dist-info}/top_level.txt +0 -0
  106. /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -1,528 +0,0 @@
1
- # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- from enum import Enum
16
- from functools import partial
17
- from typing import List, Optional
18
- from copy import deepcopy
19
-
20
- import numpy as np
21
-
22
- from model_compression_toolkit.core import FrameworkInfo
23
- from model_compression_toolkit.core.common import Graph, BaseNode
24
- from model_compression_toolkit.constants import BITS_TO_BYTES, FLOAT_BITWIDTH
25
- from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
26
- from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
27
- from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
28
- VirtualSplitWeightsNode, VirtualSplitActivationNode
29
- from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
30
- from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
31
- from model_compression_toolkit.logger import Logger
32
-
33
-
34
- def weights_size_utilization(mp_cfg: List[int],
35
- graph: Graph,
36
- fw_info: FrameworkInfo,
37
- fw_impl: FrameworkImplementation) -> np.ndarray:
38
- """
39
- Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
40
- according to the given mixed-precision configuration.
41
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
42
-
43
- Args:
44
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
45
- graph: Graph object.
46
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
47
- fw_impl: FrameworkImplementation object with specific framework methods implementation (not used in this method).
48
-
49
- Returns: A vector of node's weights memory sizes.
50
- Note that the vector is not necessarily of the same length as the given config.
51
-
52
- """
53
- weights_memory = []
54
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
55
- weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
56
-
57
- if len(mp_cfg) == 0:
58
- # Computing non-configurable nodes resource utilization
59
- # TODO: when enabling multiple attribute quantization by default (currently,
60
- # only kernel quantization is enabled) we should include other attributes memory in the sum of all
61
- # weights memory (when quantized to their default 8-bit, non-configurable).
62
- # When implementing this, we should just go over all attributes in the node instead of counting only kernels.
63
- for n in graph.nodes:
64
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
65
- if kernel_attr is None:
66
- continue
67
- non_configurable_node = n.name not in weights_mp_nodes \
68
- and not n.reuse \
69
- and n.is_all_weights_candidates_equal(kernel_attr)
70
-
71
- if non_configurable_node:
72
- node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
73
- .get_attr_config(kernel_attr).weights_n_bits)
74
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
75
- weights_memory.append(node_weights_memory_in_bytes)
76
- else:
77
- # Go over configurable all nodes that should be taken into consideration when computing the weights
78
- # resource utilization.
79
- for n in graph.get_sorted_weights_configurable_nodes(fw_info):
80
- # Only nodes with kernel op can be considered configurable
81
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
82
- node_idx = mp_nodes.index(n.name)
83
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
84
- node_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
85
-
86
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
87
-
88
- weights_memory.append(node_weights_memory_in_bytes)
89
-
90
- return np.array(weights_memory)
91
-
92
-
93
- def calc_graph_cuts(graph: Graph) -> List[Cut]:
94
- """
95
- Calculate graph activation cuts.
96
- Args:
97
- graph: A graph object to calculate activation cuts on.
98
-
99
- Returns:
100
- A list of activation cuts.
101
-
102
- """
103
- memory_graph = MemoryGraph(deepcopy(graph))
104
- _, _, cuts = compute_graph_max_cut(memory_graph)
105
-
106
- if cuts is None:
107
- Logger.critical("Failed to calculate activation memory cuts for graph.") # pragma: no cover
108
- # filter empty cuts and cuts that contain only nodes with activation quantization disabled.
109
- filtered_cuts = []
110
- for cut in cuts:
111
- cut_has_no_act_quant_nodes = any(
112
- [graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
113
- for e in cut.mem_elements.elements])
114
- if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
115
- filtered_cuts.append(cut)
116
- return filtered_cuts
117
-
118
-
119
- def activation_maxcut_size_utilization(mp_cfg: List[int],
120
- graph: Graph,
121
- fw_info: FrameworkInfo,
122
- fw_impl: FrameworkImplementation,
123
- cuts: Optional[List[Cut]] = None) -> np.ndarray:
124
- """
125
- Computes a resource utilization vector with the respective output memory max-cut size for activation
126
- nodes, according to the given mixed-precision configuration.
127
-
128
- Args:
129
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
130
- graph: Graph object.
131
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
132
- (not used in this method).
133
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
134
- cuts: a list of graph cuts (optional. if not provided calculated locally).
135
- TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
136
-
137
- Returns: A vector of node's cut memory sizes.
138
- Note that the vector is not necessarily of the same length as the given config.
139
-
140
- """
141
- if len(mp_cfg) == 0:
142
- # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
143
- # configurable nodes.
144
- return np.array([])
145
-
146
- activation_cut_memory = []
147
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
148
- # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
149
- nodes_act_nbits = {}
150
- for n in graph.get_sorted_activation_configurable_nodes():
151
- node_idx = mp_nodes.index(n.name)
152
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
153
- node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
154
- nodes_act_nbits[n.name] = node_nbits
155
-
156
- if cuts is None:
157
- cuts = calc_graph_cuts(graph)
158
-
159
- for i, cut in enumerate(cuts):
160
- mem_elements = [m.node_name for m in cut.mem_elements.elements]
161
- mem = 0
162
- for op_name in mem_elements:
163
- n = graph.find_node_by_name(op_name)[0]
164
- if n.is_activation_quantization_enabled():
165
- base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
166
- mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
167
-
168
- activation_cut_memory.append(mem)
169
-
170
- return np.array(activation_cut_memory)
171
-
172
-
173
- # TODO maxcut: add test for this function and remove no cover
174
- def activation_output_size_utilization(mp_cfg: List[int],
175
- graph: Graph,
176
- fw_info: FrameworkInfo,
177
- fw_impl: FrameworkImplementation) -> np.ndarray: # pragma: no cover
178
- """
179
- Computes a resource utilization vector with the respective output memory size for each activation configurable node,
180
- according to the given mixed-precision configuration.
181
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
182
-
183
- Args:
184
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
185
- graph: Graph object.
186
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
187
- (not used in this method).
188
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
189
-
190
- Returns: A vector of node's activation memory sizes.
191
- Note that the vector is not necessarily of the same length as the given config.
192
-
193
- """
194
- activation_memory = []
195
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
196
- activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
197
-
198
- if len(mp_cfg) == 0:
199
- # Computing non-configurable nodes resource utilization
200
- for n in graph.nodes:
201
- non_configurable_node = n.name not in activation_mp_nodes \
202
- and n.has_activation_quantization_enabled_candidate() \
203
- and n.is_all_activation_candidates_equal()
204
-
205
- if non_configurable_node:
206
- node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
207
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
208
- activation_memory.append(node_activation_memory_in_bytes)
209
- else:
210
- # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
211
- for n in graph.get_sorted_activation_configurable_nodes():
212
- node_idx = mp_nodes.index(n.name)
213
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
214
- node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
215
-
216
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
217
-
218
- activation_memory.append(node_activation_memory_in_bytes)
219
-
220
- return np.array(activation_memory)
221
-
222
-
223
- def total_weights_activation_utilization(mp_cfg: List[int],
224
- graph: Graph,
225
- fw_info: FrameworkInfo,
226
- fw_impl: FrameworkImplementation) -> np.ndarray:
227
- """
228
- Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
229
- according to the given mixed-precision configuration.
230
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
231
-
232
- Args:
233
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
234
- graph: Graph object.
235
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
236
- (not used in this method).
237
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
238
-
239
- Returns: A 2D tensor of nodes' weights memory sizes and activation output memory size.
240
- Note that the vector is not necessarily of the same length as the given config.
241
-
242
- """
243
- weights_activation_memory = []
244
- weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
245
- activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
246
-
247
- if len(mp_cfg) == 0:
248
- # Computing non-configurable nodes utilization
249
- for n in graph.nodes:
250
-
251
- non_configurable = False
252
- node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
253
-
254
- # Non-configurable Weights
255
- # TODO: currently considering only kernel attributes in weights memory utilization.
256
- # When enabling multi-attribute quantization we need to modify this method to count all attributes.
257
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
258
- if kernel_attr is not None:
259
- is_non_configurable_weights = n.name not in weights_mp_nodes and \
260
- n.is_all_weights_candidates_equal(kernel_attr) and \
261
- not n.reuse
262
-
263
- if is_non_configurable_weights:
264
- node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
265
- .get_attr_config(kernel_attr).weights_n_bits)
266
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
267
- non_configurable = True
268
-
269
- # Non-configurable Activation
270
- is_non_configurable_activation = n.name not in activation_mp_nodes and \
271
- n.has_activation_quantization_enabled_candidate() and \
272
- n.is_all_activation_candidates_equal()
273
-
274
- if is_non_configurable_activation:
275
- node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
276
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
277
- non_configurable = True
278
-
279
- if non_configurable:
280
- weights_activation_memory.append(
281
- np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
282
- else:
283
- # Go over all nodes that should be taken into consideration when computing the weights or
284
- # activation memory utilization (all configurable nodes).
285
- for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
286
- # TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
287
- # quantization we need to modify this method to count all attributes.
288
-
289
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
290
-
291
- # Compute node's weights memory (if no weights to quantize then set to 0)
292
- node_weights_memory_in_bytes = 0
293
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
294
- if kernel_attr is not None:
295
- if n.is_weights_quantization_enabled(kernel_attr) and not n.is_all_weights_candidates_equal(kernel_attr):
296
- node_weights_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
297
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_weights_nbits, fw_info)
298
-
299
- # Compute node's activation memory (if node's activation are not being quantized then set to 0)
300
- node_activation_nbits = node_qc.activation_quantization_cfg.activation_n_bits
301
- node_activation_memory_in_bytes = 0
302
- if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal():
303
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_activation_nbits)
304
-
305
- weights_activation_memory.append(np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
306
-
307
- return np.array(weights_activation_memory)
308
-
309
-
310
- def bops_utilization(mp_cfg: List[int],
311
- graph: Graph,
312
- fw_info: FrameworkInfo,
313
- fw_impl: FrameworkImplementation,
314
- set_constraints: bool = True) -> np.ndarray:
315
- """
316
- Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
317
- according to the given mixed-precision configuration of a virtual graph with composed nodes.
318
-
319
- Args:
320
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
321
- graph: Graph object.
322
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
323
- fw_impl: FrameworkImplementation object with specific framework methods implementation.
324
- set_constraints: A flag for utilizing the method for resource utilization computation of a
325
- given config not for LP formalization purposes.
326
-
327
- Returns: A vector of node's BOPS count.
328
- Note that the vector is not necessarily of the same length as the given config.
329
-
330
- """
331
-
332
- if not set_constraints:
333
- return _bops_utilization(mp_cfg,
334
- graph,
335
- fw_info,
336
- fw_impl)
337
-
338
- # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
339
- # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
340
-
341
- virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
342
-
343
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
344
- bops = [n.get_bops_count(fw_impl, fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes)) for n in virtual_bops_nodes]
345
-
346
- return np.array(bops)
347
-
348
-
349
- def _bops_utilization(mp_cfg: List[int],
350
- graph: Graph,
351
- fw_info: FrameworkInfo,
352
- fw_impl: FrameworkImplementation) -> np.ndarray:
353
- """
354
- Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
355
- according to the given mixed-precision configuration of an original graph.
356
-
357
- Args:
358
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
359
- graph: Graph object.
360
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
361
- fw_impl: FrameworkImplementation object with specific framework methods implementation.
362
-
363
- Returns: A vector of node's BOPS count.
364
-
365
- """
366
-
367
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
368
-
369
- # Go over all nodes that should be taken into consideration when computing the BOPS utilization.
370
- bops = []
371
- for n in graph.get_topo_sorted_nodes():
372
- if n.has_kernel_weight_to_quantize(fw_info) and not n.has_positional_weights:
373
- # If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
374
- incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
375
- if len(incoming_edges) != 1:
376
- Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.") # pragma: no cover
377
- input_activation_node = incoming_edges[0].source_node
378
- if len(graph.out_edges(input_activation_node)) > 1:
379
- # In the case where the activation node has multiple outgoing edges
380
- # we don't consider this edge in the BOPS utilization calculation
381
- continue
382
-
383
- input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
384
-
385
- node_mac = fw_impl.get_node_mac_operations(n, fw_info)
386
-
387
- node_qc = n.candidates_quantization_cfg[_get_node_cfg_idx(n, mp_cfg, mp_nodes)]
388
- kenrel_node_qc = node_qc.weights_quantization_cfg.get_attr_config(fw_info.get_kernel_op_attributes(n.type)[0])
389
- node_weights_nbits = kenrel_node_qc.weights_n_bits if \
390
- kenrel_node_qc.enable_weights_quantization else FLOAT_BITWIDTH
391
- input_activation_nbits = input_activation_node_cfg.activation_quantization_cfg.activation_n_bits if \
392
- input_activation_node_cfg.activation_quantization_cfg.enable_activation_quantization else FLOAT_BITWIDTH
393
-
394
- node_bops = node_weights_nbits * input_activation_nbits * node_mac
395
- bops.append(node_bops)
396
-
397
- return np.array(bops)
398
-
399
-
400
- def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nodes_names: List[str]) -> int:
401
- """
402
- Returns the index of a node's quantization configuration candidate according to the given
403
- mixed-precision configuration. If the node is not configurable, then it must have a single configuration,
404
- therefore, the index 0 is returned.
405
-
406
- Args:
407
- node: A node to get its candidate configuration index.
408
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
409
- sorted_configurable_nodes_names: A list of configurable nodes names.
410
-
411
- Returns: An index (integer) of a node's quantization configuration candidate.
412
- """
413
-
414
- if node.name in sorted_configurable_nodes_names:
415
- node_idx = sorted_configurable_nodes_names.index(node.name)
416
- return mp_cfg[node_idx]
417
- else:
418
- assert len(node.candidates_quantization_cfg) > 0, \
419
- "Any node should have at least one candidate configuration."
420
- return 0
421
-
422
-
423
- def _get_origin_weights_node(n: BaseNode) -> BaseNode:
424
- """
425
- In case we run a resource utilization computation on a virtual graph,
426
- this method is used to retrieve the original node out of a virtual weights node,
427
-
428
- Args:
429
- n: A possibly virtual node.
430
-
431
- Returns: A node from the original (non-virtual) graph which the given node represents.
432
-
433
- """
434
-
435
- if isinstance(n, VirtualActivationWeightsNode):
436
- return n.original_weights_node
437
- if isinstance(n, VirtualSplitWeightsNode):
438
- return n.origin_node
439
-
440
- return n
441
-
442
-
443
- def _get_origin_activation_node(n: BaseNode) -> BaseNode:
444
- """
445
- In case we run a resource utilization computation on a virtual graph,
446
- this method is used to retrieve the original node out of a virtual activation node,
447
-
448
- Args:
449
- n: A possibly virtual node.
450
-
451
- Returns: A node from the original (non-virtual) graph which the given node represents.
452
-
453
- """
454
-
455
- if isinstance(n, VirtualActivationWeightsNode):
456
- return n.original_activation_node
457
- if isinstance(n, VirtualSplitActivationNode):
458
- return n.origin_node
459
-
460
- return n
461
-
462
-
463
- def _compute_node_weights_memory(n: BaseNode, node_nbits: int, fw_info: FrameworkInfo) -> float:
464
- """
465
- Computes the weights' memory of the given node.
466
-
467
- Args:
468
- n: A node to compute its weights' memory.
469
- node_nbits: A bit-width in which the node's weights should be quantized.
470
- fw_info: FrameworkInfo object about the specific framework.
471
-
472
- Returns: The total memory of the node's weights when quantized to the given bit-width.
473
-
474
- """
475
-
476
- origin_node = _get_origin_weights_node(n)
477
-
478
- node_num_weights_params = 0
479
- for attr in fw_info.get_kernel_op_attributes(origin_node.type):
480
- if attr is not None:
481
- node_num_weights_params += origin_node.get_weights_by_keys(attr).flatten().shape[0]
482
-
483
- return node_num_weights_params * node_nbits / BITS_TO_BYTES
484
-
485
-
486
- def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
487
- """
488
- Computes the activation tensor memory of the given node.
489
-
490
- Args:
491
- n: A node to compute its activation tensor memory.
492
- node_nbits: A bit-width in which the node's weights should be quantized.
493
-
494
- Returns: The total memory of the node's activation tensor when quantized to the given bit-width.
495
-
496
- """
497
-
498
- origin_node = _get_origin_activation_node(n)
499
- node_output_size = origin_node.get_total_output_params()
500
-
501
- return node_output_size * node_nbits / BITS_TO_BYTES
502
-
503
-
504
- class MpRuMetric(Enum):
505
- """
506
- Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
507
- for a given mp config. The enum values can be used to call a function on a set of arguments.
508
-
509
- WEIGHTS_SIZE - applies the weights_size_utilization function
510
-
511
- ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
512
-
513
- ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
514
-
515
- TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
516
-
517
- BOPS_COUNT - applies the bops_utilization function
518
-
519
- """
520
-
521
- WEIGHTS_SIZE = partial(weights_size_utilization)
522
- ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
523
- ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
524
- TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
525
- BOPS_COUNT = partial(bops_utilization)
526
-
527
- def __call__(self, *args):
528
- return self.value(*args)
@@ -1,23 +0,0 @@
1
- # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
-
16
- from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework.attribute_filter import AttributeFilter
17
- from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities, OperationsSetToLayers, Smaller, SmallerEq, NotEq, Eq, GreaterEq, Greater, LayerFilterParams, OperationsToLayers, get_current_tpc
18
- from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformModel, OperatorsSet, \
19
- OperatorSetConcat, Signedness, AttributeQuantizationConfig, OpQuantizationConfig, QuantizationConfigOptions, Fusing
20
-
21
- from mct_quantizers import QuantizationMethod
22
-
23
-