mct-nightly 2.2.0.20250113.527__py3-none-any.whl → 2.2.0.20250114.84821__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/RECORD +103 -105
  3. model_compression_toolkit/__init__.py +2 -2
  4. model_compression_toolkit/core/common/framework_info.py +1 -3
  5. model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
  6. model_compression_toolkit/core/common/graph/base_graph.py +20 -21
  7. model_compression_toolkit/core/common/graph/base_node.py +44 -17
  8. model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
  9. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
  10. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +26 -135
  11. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
  12. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +667 -0
  13. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
  14. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +164 -470
  15. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +30 -7
  16. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
  17. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
  18. model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
  19. model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
  20. model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
  21. model_compression_toolkit/core/common/pruning/pruner.py +5 -3
  22. model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
  23. model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
  24. model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
  25. model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
  26. model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
  27. model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
  28. model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
  29. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
  30. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
  31. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
  32. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
  33. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
  34. model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
  35. model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
  36. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
  37. model_compression_toolkit/core/graph_prep_runner.py +12 -11
  38. model_compression_toolkit/core/keras/data_util.py +24 -5
  39. model_compression_toolkit/core/keras/default_framework_info.py +1 -1
  40. model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
  41. model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
  42. model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
  43. model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
  44. model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
  45. model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
  46. model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
  47. model_compression_toolkit/core/runner.py +33 -60
  48. model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
  49. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
  50. model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
  51. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  52. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  53. model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  54. model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
  55. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
  56. model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
  57. model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  58. model_compression_toolkit/metadata.py +11 -10
  59. model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
  60. model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
  61. model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
  62. model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
  63. model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
  64. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
  65. model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
  66. model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
  67. model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
  68. model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
  69. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
  70. model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
  71. model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
  72. model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
  73. model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
  74. model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
  75. model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
  76. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
  77. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
  78. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
  79. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
  80. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
  81. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
  82. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
  83. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
  84. model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
  85. model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
  86. model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
  87. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
  88. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
  89. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
  90. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
  91. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
  92. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
  93. model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
  94. model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
  95. model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
  96. model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
  97. model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
  98. model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
  99. model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
  100. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
  101. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
  102. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
  103. {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/LICENSE.md +0 -0
  104. {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/WHEEL +0 -0
  105. {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/top_level.txt +0 -0
  106. /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -12,389 +12,191 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from enum import Enum
16
- from functools import partial
17
- from typing import List, Optional
18
- from copy import deepcopy
15
+ from typing import List, Set, Dict, Optional, Tuple
19
16
 
20
17
  import numpy as np
21
18
 
22
19
  from model_compression_toolkit.core import FrameworkInfo
23
20
  from model_compression_toolkit.core.common import Graph, BaseNode
24
- from model_compression_toolkit.constants import BITS_TO_BYTES, FLOAT_BITWIDTH
25
21
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
26
- from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
27
- from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
28
- VirtualSplitWeightsNode, VirtualSplitActivationNode
29
- from model_compression_toolkit.core.common.graph.memory_graph.memory_graph import MemoryGraph
30
- from model_compression_toolkit.core.common.graph.memory_graph.compute_graph_max_cut import compute_graph_max_cut, Cut
31
- from model_compression_toolkit.logger import Logger
32
-
33
-
34
- def weights_size_utilization(mp_cfg: List[int],
35
- graph: Graph,
36
- fw_info: FrameworkInfo,
37
- fw_impl: FrameworkImplementation) -> np.ndarray:
38
- """
39
- Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
40
- according to the given mixed-precision configuration.
41
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
42
-
43
- Args:
44
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
45
- graph: Graph object.
46
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
47
- fw_impl: FrameworkImplementation object with specific framework methods implementation (not used in this method).
48
-
49
- Returns: A vector of node's weights memory sizes.
50
- Note that the vector is not necessarily of the same length as the given config.
51
-
52
- """
53
- weights_memory = []
54
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
55
- weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
56
-
57
- if len(mp_cfg) == 0:
58
- # Computing non-configurable nodes resource utilization
59
- # TODO: when enabling multiple attribute quantization by default (currently,
60
- # only kernel quantization is enabled) we should include other attributes memory in the sum of all
61
- # weights memory (when quantized to their default 8-bit, non-configurable).
62
- # When implementing this, we should just go over all attributes in the node instead of counting only kernels.
63
- for n in graph.nodes:
64
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
65
- if kernel_attr is None:
66
- continue
67
- non_configurable_node = n.name not in weights_mp_nodes \
68
- and not n.reuse \
69
- and n.is_all_weights_candidates_equal(kernel_attr)
70
-
71
- if non_configurable_node:
72
- node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
73
- .get_attr_config(kernel_attr).weights_n_bits)
74
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
75
- weights_memory.append(node_weights_memory_in_bytes)
76
- else:
77
- # Go over configurable all nodes that should be taken into consideration when computing the weights
78
- # resource utilization.
79
- for n in graph.get_sorted_weights_configurable_nodes(fw_info):
80
- # Only nodes with kernel op can be considered configurable
81
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
82
- node_idx = mp_nodes.index(n.name)
83
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
84
- node_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
85
-
86
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
87
-
88
- weights_memory.append(node_weights_memory_in_bytes)
89
-
90
- return np.array(weights_memory)
91
-
92
-
93
- def calc_graph_cuts(graph: Graph) -> List[Cut]:
94
- """
95
- Calculate graph activation cuts.
96
- Args:
97
- graph: A graph object to calculate activation cuts on.
98
-
99
- Returns:
100
- A list of activation cuts.
101
-
102
- """
103
- memory_graph = MemoryGraph(deepcopy(graph))
104
- _, _, cuts = compute_graph_max_cut(memory_graph)
105
-
106
- if cuts is None:
107
- Logger.critical("Failed to calculate activation memory cuts for graph.") # pragma: no cover
108
- # filter empty cuts and cuts that contain only nodes with activation quantization disabled.
109
- filtered_cuts = []
110
- for cut in cuts:
111
- cut_has_no_act_quant_nodes = any(
112
- [graph.find_node_by_name(e.node_name)[0].has_activation_quantization_enabled_candidate()
113
- for e in cut.mem_elements.elements])
114
- if len(cut.mem_elements.elements) > 0 and cut_has_no_act_quant_nodes:
115
- filtered_cuts.append(cut)
116
- return filtered_cuts
117
-
118
-
119
- def activation_maxcut_size_utilization(mp_cfg: List[int],
120
- graph: Graph,
121
- fw_info: FrameworkInfo,
122
- fw_impl: FrameworkImplementation,
123
- cuts: Optional[List[Cut]] = None) -> np.ndarray:
124
- """
125
- Computes a resource utilization vector with the respective output memory max-cut size for activation
126
- nodes, according to the given mixed-precision configuration.
22
+ from model_compression_toolkit.core.common.graph.memory_graph.cut import Cut
23
+ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode
24
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
25
+ RUTarget
26
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
27
+ ResourceUtilizationCalculator, BitwidthMode, TargetInclusionCriterion
28
+ from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
29
+ NodeActivationQuantizationConfig
30
+
31
+
32
+ # TODO take into account Virtual nodes. Are candidates defined with respect to virtual or original nodes?
33
+ # Can we use the virtual graph only for bops and the original graph for everything else?
34
+
35
+ class MixedPrecisionRUHelper:
36
+ """ Helper class for resource utilization computations for mixed precision optimization. """
37
+
38
+ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation):
39
+ self.graph = graph
40
+ self.fw_info = fw_info
41
+ self.fw_impl = fw_impl
42
+ self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
43
+
44
+ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
45
+ """
46
+ Compute utilization of requested targets for a specific configuration in the format expected by LP problem
47
+ formulation, namely an array of ru values corresponding to graph's configurable nodes in the topological order.
48
+ For activation target, the array contains values for activation cuts in unspecified order (as long as it is
49
+ consistent between configurations).
50
+
51
+ Args:
52
+ ru_targets: resource utilization targets to compute.
53
+ mp_cfg: a list of candidates indices for configurable layers.
54
+
55
+ Returns:
56
+ Dict of the computed utilization per target.
57
+ """
58
+
59
+ ru = {}
60
+
61
+ act_qcs, w_qcs = self.get_configurable_qcs(mp_cfg) if mp_cfg else (None, None)
62
+ w_util = None
63
+ if RUTarget.WEIGHTS in ru_targets:
64
+ w_util = self._weights_utilization(w_qcs)
65
+ ru[RUTarget.WEIGHTS] = np.array(list(w_util.values()))
66
+
67
+ # TODO make mp agnostic to activation method
68
+ if RUTarget.ACTIVATION in ru_targets:
69
+ act_util = self._activation_maxcut_utilization(act_qcs)
70
+ ru[RUTarget.ACTIVATION] = np.array(list(act_util.values()))
71
+
72
+ # TODO use maxcut
73
+ if RUTarget.TOTAL in ru_targets:
74
+ act_tensors_util = self._activation_tensor_utilization(act_qcs)
75
+ w_util = w_util or self._weights_utilization(w_qcs)
76
+ total = {n: (w_util.get(n, 0), act_tensors_util.get(n, 0))
77
+ # for n in self.graph.nodes if n in act_tensors_util or n in w_util}
78
+ for n in self.graph.get_topo_sorted_nodes() if n in act_tensors_util or n in w_util}
79
+ ru[RUTarget.TOTAL] = np.array(list(total.values()))
80
+
81
+ if RUTarget.BOPS in ru_targets:
82
+ ru[RUTarget.BOPS] = self._bops_utilization(mp_cfg)
83
+
84
+ return ru
85
+
86
+ def get_configurable_qcs(self, mp_cfg) \
87
+ -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
88
+ """
89
+ Retrieve quantization candidates objects for weights and activations from the configuration list.
90
+
91
+ Args:
92
+ mp_cfg: a list of candidates indices for configurable layers.
93
+
94
+ Returns:
95
+ Mapping between nodes to weights quantization config, and a mapping between nodes and activation
96
+ quantization config.
97
+ """
98
+ mp_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
99
+ node_qcs = {n: n.candidates_quantization_cfg[mp_cfg[i]] for i, n in enumerate(mp_nodes)}
100
+ act_qcs = {n: node_qcs[n].activation_quantization_cfg
101
+ for n in self.graph.get_activation_configurable_nodes()}
102
+ w_qcs = {n: node_qcs[n].weights_quantization_cfg
103
+ for n in self.graph.get_weights_configurable_nodes(self.fw_info)}
104
+ return act_qcs, w_qcs
105
+
106
+ def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
107
+ """
108
+ Compute weights utilization for configurable weights if configuration is passed,
109
+ or for non-configurable nodes otherwise.
110
+
111
+ Args:
112
+ w_qcs: nodes quantization configuration to compute, or None.
113
+
114
+ Returns:
115
+ Weight utilization per node.
116
+ """
117
+ if w_qcs:
118
+ target_criterion = TargetInclusionCriterion.QConfigurable
119
+ bitwidth_mode = BitwidthMode.QCustom
120
+ else:
121
+ target_criterion = TargetInclusionCriterion.QNonConfigurable
122
+ bitwidth_mode = BitwidthMode.QDefaultSP
123
+
124
+ _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=target_criterion,
125
+ bitwidth_mode=bitwidth_mode,
126
+ w_qcs=w_qcs)
127
+ nodes_util = {n: u.bytes for n, u in nodes_util.items()}
128
+ return nodes_util
129
+
130
+ def _activation_maxcut_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
131
+ -> Optional[Dict[Cut, float]]:
132
+ """
133
+ Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
134
+
135
+ Args:
136
+ act_qcs: nodes activation configuration or None.
137
+
138
+ Returns:
139
+ Activation utilization per cut, or empty dict if no configuration was passed.
140
+ """
141
+ if act_qcs:
142
+ _, cuts_util, _ = self.ru_calculator.compute_cut_activation_utilization(TargetInclusionCriterion.AnyQuantized,
143
+ bitwidth_mode=BitwidthMode.QCustom,
144
+ act_qcs=act_qcs)
145
+ cuts_util = {c: u.bytes for c, u in cuts_util.items()}
146
+ return cuts_util
127
147
 
128
- Args:
129
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
130
- graph: Graph object.
131
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
132
- (not used in this method).
133
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
134
- cuts: a list of graph cuts (optional. if not provided calculated locally).
135
- TODO maxcut: refactor - need to remove the cuts so all metric functions signatures are the same.
136
-
137
- Returns: A vector of node's cut memory sizes.
138
- Note that the vector is not necessarily of the same length as the given config.
139
-
140
- """
141
- if len(mp_cfg) == 0:
142
148
  # Computing non-configurable nodes resource utilization for max-cut is included in the calculation of the
143
149
  # configurable nodes.
144
- return np.array([])
145
-
146
- activation_cut_memory = []
147
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
148
- # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
149
- nodes_act_nbits = {}
150
- for n in graph.get_sorted_activation_configurable_nodes():
151
- node_idx = mp_nodes.index(n.name)
152
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
153
- node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
154
- nodes_act_nbits[n.name] = node_nbits
155
-
156
- if cuts is None:
157
- cuts = calc_graph_cuts(graph)
158
-
159
- for i, cut in enumerate(cuts):
160
- mem_elements = [m.node_name for m in cut.mem_elements.elements]
161
- mem = 0
162
- for op_name in mem_elements:
163
- n = graph.find_node_by_name(op_name)[0]
164
- if n.is_activation_quantization_enabled():
165
- base_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
166
- mem += _compute_node_activation_memory(n, nodes_act_nbits.get(op_name, base_nbits))
167
-
168
- activation_cut_memory.append(mem)
169
-
170
- return np.array(activation_cut_memory)
171
-
172
-
173
- # TODO maxcut: add test for this function and remove no cover
174
- def activation_output_size_utilization(mp_cfg: List[int],
175
- graph: Graph,
176
- fw_info: FrameworkInfo,
177
- fw_impl: FrameworkImplementation) -> np.ndarray: # pragma: no cover
178
- """
179
- Computes a resource utilization vector with the respective output memory size for each activation configurable node,
180
- according to the given mixed-precision configuration.
181
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
182
-
183
- Args:
184
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
185
- graph: Graph object.
186
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
187
- (not used in this method).
188
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
189
-
190
- Returns: A vector of node's activation memory sizes.
191
- Note that the vector is not necessarily of the same length as the given config.
192
-
193
- """
194
- activation_memory = []
195
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
196
- activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
197
-
198
- if len(mp_cfg) == 0:
199
- # Computing non-configurable nodes resource utilization
200
- for n in graph.nodes:
201
- non_configurable_node = n.name not in activation_mp_nodes \
202
- and n.has_activation_quantization_enabled_candidate() \
203
- and n.is_all_activation_candidates_equal()
204
-
205
- if non_configurable_node:
206
- node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
207
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
208
- activation_memory.append(node_activation_memory_in_bytes)
209
- else:
210
- # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
211
- for n in graph.get_sorted_activation_configurable_nodes():
212
- node_idx = mp_nodes.index(n.name)
213
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
214
- node_nbits = node_qc.activation_quantization_cfg.activation_n_bits
215
-
216
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
217
-
218
- activation_memory.append(node_activation_memory_in_bytes)
219
-
220
- return np.array(activation_memory)
221
-
222
-
223
- def total_weights_activation_utilization(mp_cfg: List[int],
224
- graph: Graph,
225
- fw_info: FrameworkInfo,
226
- fw_impl: FrameworkImplementation) -> np.ndarray:
227
- """
228
- Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
229
- according to the given mixed-precision configuration.
230
- If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
150
+ return {}
231
151
 
232
- Args:
233
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
234
- graph: Graph object.
235
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize)
236
- (not used in this method).
237
- fw_impl: FrameworkImplementation object with specific framework methods implementation(not used in this method).
152
+ def _activation_tensor_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]):
153
+ """
154
+ Compute activation tensors utilization fo configurable nodes if configuration is passed or
155
+ for non-configurable nodes otherwise.
238
156
 
239
- Returns: A 2D tensor of nodes' weights memory sizes and activation output memory size.
240
- Note that the vector is not necessarily of the same length as the given config.
157
+ Args:
158
+ act_qcs: activation quantization configuration or None.
241
159
 
242
- """
243
- weights_activation_memory = []
244
- weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
245
- activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
246
-
247
- if len(mp_cfg) == 0:
248
- # Computing non-configurable nodes utilization
249
- for n in graph.nodes:
250
-
251
- non_configurable = False
252
- node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
253
-
254
- # Non-configurable Weights
255
- # TODO: currently considering only kernel attributes in weights memory utilization.
256
- # When enabling multi-attribute quantization we need to modify this method to count all attributes.
257
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
258
- if kernel_attr is not None:
259
- is_non_configurable_weights = n.name not in weights_mp_nodes and \
260
- n.is_all_weights_candidates_equal(kernel_attr) and \
261
- not n.reuse
262
-
263
- if is_non_configurable_weights:
264
- node_nbits = (n.candidates_quantization_cfg[0].weights_quantization_cfg
265
- .get_attr_config(kernel_attr).weights_n_bits)
266
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
267
- non_configurable = True
268
-
269
- # Non-configurable Activation
270
- is_non_configurable_activation = n.name not in activation_mp_nodes and \
271
- n.has_activation_quantization_enabled_candidate() and \
272
- n.is_all_activation_candidates_equal()
273
-
274
- if is_non_configurable_activation:
275
- node_nbits = n.candidates_quantization_cfg[0].activation_quantization_cfg.activation_n_bits
276
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
277
- non_configurable = True
278
-
279
- if non_configurable:
280
- weights_activation_memory.append(
281
- np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
282
- else:
283
- # Go over all nodes that should be taken into consideration when computing the weights or
284
- # activation memory utilization (all configurable nodes).
285
- for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
286
- # TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
287
- # quantization we need to modify this method to count all attributes.
288
-
289
- node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
290
-
291
- # Compute node's weights memory (if no weights to quantize then set to 0)
292
- node_weights_memory_in_bytes = 0
293
- kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
294
- if kernel_attr is not None:
295
- if n.is_weights_quantization_enabled(kernel_attr) and not n.is_all_weights_candidates_equal(kernel_attr):
296
- node_weights_nbits = node_qc.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits
297
- node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_weights_nbits, fw_info)
298
-
299
- # Compute node's activation memory (if node's activation are not being quantized then set to 0)
300
- node_activation_nbits = node_qc.activation_quantization_cfg.activation_n_bits
301
- node_activation_memory_in_bytes = 0
302
- if n.is_activation_quantization_enabled() and not n.is_all_activation_candidates_equal():
303
- node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_activation_nbits)
304
-
305
- weights_activation_memory.append(np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
306
-
307
- return np.array(weights_activation_memory)
308
-
309
-
310
- def bops_utilization(mp_cfg: List[int],
311
- graph: Graph,
312
- fw_info: FrameworkInfo,
313
- fw_impl: FrameworkImplementation,
314
- set_constraints: bool = True) -> np.ndarray:
315
- """
316
- Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
317
- according to the given mixed-precision configuration of a virtual graph with composed nodes.
160
+ Returns:
161
+ Activation utilization per node.
162
+ """
163
+ if act_qcs:
164
+ target_criterion = TargetInclusionCriterion.QConfigurable
165
+ bitwidth_mode = BitwidthMode.QCustom
166
+ else:
167
+ target_criterion = TargetInclusionCriterion.QNonConfigurable
168
+ bitwidth_mode = BitwidthMode.QDefaultSP
318
169
 
319
- Args:
320
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
321
- graph: Graph object.
322
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
323
- fw_impl: FrameworkImplementation object with specific framework methods implementation.
324
- set_constraints: A flag for utilizing the method for resource utilization computation of a
325
- given config not for LP formalization purposes.
170
+ _, nodes_util = self.ru_calculator.compute_activation_tensors_utilization(target_criterion=target_criterion,
171
+ bitwidth_mode=bitwidth_mode,
172
+ act_qcs=act_qcs)
173
+ return {n: u.bytes for n, u in nodes_util.items()}
326
174
 
327
- Returns: A vector of node's BOPS count.
328
- Note that the vector is not necessarily of the same length as the given config.
175
+ def _bops_utilization(self, mp_cfg: List[int]):
176
+ """
177
+ Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
178
+ according to the given mixed-precision configuration of a virtual graph with composed nodes.
329
179
 
330
- """
331
-
332
- if not set_constraints:
333
- return _bops_utilization(mp_cfg,
334
- graph,
335
- fw_info,
336
- fw_impl)
180
+ Args:
181
+ mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
337
182
 
338
- # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
339
- # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
183
+ Returns: A vector of node's BOPS count.
184
+ Note that the vector is not necessarily of the same length as the given config.
340
185
 
341
- virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
186
+ """
187
+ # TODO keeping old implementation for now
342
188
 
343
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
344
- bops = [n.get_bops_count(fw_impl, fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes)) for n in virtual_bops_nodes]
189
+ # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
190
+ # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
345
191
 
346
- return np.array(bops)
192
+ virtual_bops_nodes = [n for n in self.graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
347
193
 
194
+ mp_nodes = self.graph.get_configurable_sorted_nodes_names(self.fw_info)
348
195
 
349
- def _bops_utilization(mp_cfg: List[int],
350
- graph: Graph,
351
- fw_info: FrameworkInfo,
352
- fw_impl: FrameworkImplementation) -> np.ndarray:
353
- """
354
- Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
355
- according to the given mixed-precision configuration of an original graph.
196
+ bops = [n.get_bops_count(self.fw_impl, self.fw_info, candidate_idx=_get_node_cfg_idx(n, mp_cfg, mp_nodes))
197
+ for n in virtual_bops_nodes]
356
198
 
357
- Args:
358
- mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
359
- graph: Graph object.
360
- fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
361
- fw_impl: FrameworkImplementation object with specific framework methods implementation.
362
-
363
- Returns: A vector of node's BOPS count.
364
-
365
- """
366
-
367
- mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
368
-
369
- # Go over all nodes that should be taken into consideration when computing the BOPS utilization.
370
- bops = []
371
- for n in graph.get_topo_sorted_nodes():
372
- if n.has_kernel_weight_to_quantize(fw_info) and not n.has_positional_weights:
373
- # If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
374
- incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
375
- if len(incoming_edges) != 1:
376
- Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.") # pragma: no cover
377
- input_activation_node = incoming_edges[0].source_node
378
- if len(graph.out_edges(input_activation_node)) > 1:
379
- # In the case where the activation node has multiple outgoing edges
380
- # we don't consider this edge in the BOPS utilization calculation
381
- continue
382
-
383
- input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
384
-
385
- node_mac = fw_impl.get_node_mac_operations(n, fw_info)
386
-
387
- node_qc = n.candidates_quantization_cfg[_get_node_cfg_idx(n, mp_cfg, mp_nodes)]
388
- kenrel_node_qc = node_qc.weights_quantization_cfg.get_attr_config(fw_info.get_kernel_op_attributes(n.type)[0])
389
- node_weights_nbits = kenrel_node_qc.weights_n_bits if \
390
- kenrel_node_qc.enable_weights_quantization else FLOAT_BITWIDTH
391
- input_activation_nbits = input_activation_node_cfg.activation_quantization_cfg.activation_n_bits if \
392
- input_activation_node_cfg.activation_quantization_cfg.enable_activation_quantization else FLOAT_BITWIDTH
393
-
394
- node_bops = node_weights_nbits * input_activation_nbits * node_mac
395
- bops.append(node_bops)
396
-
397
- return np.array(bops)
199
+ return np.array(bops)
398
200
 
399
201
 
400
202
  def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nodes_names: List[str]) -> int:
@@ -414,115 +216,7 @@ def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nod
414
216
  if node.name in sorted_configurable_nodes_names:
415
217
  node_idx = sorted_configurable_nodes_names.index(node.name)
416
218
  return mp_cfg[node_idx]
417
- else:
219
+ else: # pragma: no cover
418
220
  assert len(node.candidates_quantization_cfg) > 0, \
419
221
  "Any node should have at least one candidate configuration."
420
222
  return 0
421
-
422
-
423
- def _get_origin_weights_node(n: BaseNode) -> BaseNode:
424
- """
425
- In case we run a resource utilization computation on a virtual graph,
426
- this method is used to retrieve the original node out of a virtual weights node,
427
-
428
- Args:
429
- n: A possibly virtual node.
430
-
431
- Returns: A node from the original (non-virtual) graph which the given node represents.
432
-
433
- """
434
-
435
- if isinstance(n, VirtualActivationWeightsNode):
436
- return n.original_weights_node
437
- if isinstance(n, VirtualSplitWeightsNode):
438
- return n.origin_node
439
-
440
- return n
441
-
442
-
443
- def _get_origin_activation_node(n: BaseNode) -> BaseNode:
444
- """
445
- In case we run a resource utilization computation on a virtual graph,
446
- this method is used to retrieve the original node out of a virtual activation node,
447
-
448
- Args:
449
- n: A possibly virtual node.
450
-
451
- Returns: A node from the original (non-virtual) graph which the given node represents.
452
-
453
- """
454
-
455
- if isinstance(n, VirtualActivationWeightsNode):
456
- return n.original_activation_node
457
- if isinstance(n, VirtualSplitActivationNode):
458
- return n.origin_node
459
-
460
- return n
461
-
462
-
463
- def _compute_node_weights_memory(n: BaseNode, node_nbits: int, fw_info: FrameworkInfo) -> float:
464
- """
465
- Computes the weights' memory of the given node.
466
-
467
- Args:
468
- n: A node to compute its weights' memory.
469
- node_nbits: A bit-width in which the node's weights should be quantized.
470
- fw_info: FrameworkInfo object about the specific framework.
471
-
472
- Returns: The total memory of the node's weights when quantized to the given bit-width.
473
-
474
- """
475
-
476
- origin_node = _get_origin_weights_node(n)
477
-
478
- node_num_weights_params = 0
479
- for attr in fw_info.get_kernel_op_attributes(origin_node.type):
480
- if attr is not None:
481
- node_num_weights_params += origin_node.get_weights_by_keys(attr).flatten().shape[0]
482
-
483
- return node_num_weights_params * node_nbits / BITS_TO_BYTES
484
-
485
-
486
- def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
487
- """
488
- Computes the activation tensor memory of the given node.
489
-
490
- Args:
491
- n: A node to compute its activation tensor memory.
492
- node_nbits: A bit-width in which the node's weights should be quantized.
493
-
494
- Returns: The total memory of the node's activation tensor when quantized to the given bit-width.
495
-
496
- """
497
-
498
- origin_node = _get_origin_activation_node(n)
499
- node_output_size = origin_node.get_total_output_params()
500
-
501
- return node_output_size * node_nbits / BITS_TO_BYTES
502
-
503
-
504
- class MpRuMetric(Enum):
505
- """
506
- Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
507
- for a given mp config. The enum values can be used to call a function on a set of arguments.
508
-
509
- WEIGHTS_SIZE - applies the weights_size_utilization function
510
-
511
- ACTIVATION_MAXCUT_SIZE - applies the activation_maxcut_size_utilization function.
512
-
513
- ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
514
-
515
- TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
516
-
517
- BOPS_COUNT - applies the bops_utilization function
518
-
519
- """
520
-
521
- WEIGHTS_SIZE = partial(weights_size_utilization)
522
- ACTIVATION_MAXCUT_SIZE = partial(activation_maxcut_size_utilization)
523
- ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
524
- TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
525
- BOPS_COUNT = partial(bops_utilization)
526
-
527
- def __call__(self, *args):
528
- return self.value(*args)