mct-nightly 1.11.0.20240320.400__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/METADATA +17 -9
  2. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/RECORD +152 -152
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +1 -1
  5. model_compression_toolkit/core/__init__.py +3 -3
  6. model_compression_toolkit/core/common/collectors/base_collector.py +2 -2
  7. model_compression_toolkit/core/common/data_loader.py +3 -3
  8. model_compression_toolkit/core/common/graph/base_graph.py +10 -13
  9. model_compression_toolkit/core/common/graph/base_node.py +3 -3
  10. model_compression_toolkit/core/common/graph/edge.py +2 -1
  11. model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py +2 -4
  12. model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +2 -2
  13. model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
  14. model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py +3 -5
  15. model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +1 -2
  16. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +24 -23
  17. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +110 -112
  18. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +114 -0
  19. model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py → resource_utilization_tools/resource_utilization_data.py} +19 -19
  20. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +105 -0
  21. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +26 -0
  22. model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} +61 -61
  23. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +75 -71
  24. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +2 -4
  25. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +34 -34
  26. model_compression_toolkit/core/common/model_collector.py +2 -2
  27. model_compression_toolkit/core/common/network_editors/actions.py +3 -3
  28. model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +12 -12
  29. model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +2 -2
  30. model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +2 -2
  31. model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +2 -2
  32. model_compression_toolkit/core/common/pruning/memory_calculator.py +7 -7
  33. model_compression_toolkit/core/common/pruning/prune_graph.py +2 -3
  34. model_compression_toolkit/core/common/pruning/pruner.py +7 -7
  35. model_compression_toolkit/core/common/pruning/pruning_config.py +1 -1
  36. model_compression_toolkit/core/common/pruning/pruning_info.py +2 -2
  37. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +7 -4
  38. model_compression_toolkit/core/common/quantization/node_quantization_config.py +3 -1
  39. model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +4 -2
  40. model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +4 -6
  41. model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +2 -4
  42. model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -1
  43. model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py +8 -6
  44. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +2 -2
  45. model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +4 -6
  46. model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +4 -7
  47. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +3 -3
  48. model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +1 -1
  49. model_compression_toolkit/core/common/substitutions/weights_activation_split.py +3 -3
  50. model_compression_toolkit/core/common/user_info.py +1 -1
  51. model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
  52. model_compression_toolkit/core/keras/back2framework/instance_builder.py +2 -2
  53. model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +4 -8
  54. model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py +3 -2
  55. model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +2 -2
  56. model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py +1 -1
  57. model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py +1 -1
  58. model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  59. model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +3 -3
  60. model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py +1 -2
  61. model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py +5 -6
  62. model_compression_toolkit/core/keras/keras_implementation.py +1 -1
  63. model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +1 -1
  64. model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +2 -4
  65. model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +1 -1
  66. model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py +7 -7
  67. model_compression_toolkit/core/keras/reader/common.py +2 -2
  68. model_compression_toolkit/core/keras/reader/node_builder.py +1 -1
  69. model_compression_toolkit/core/keras/{kpi_data_facade.py → resource_utilization_data_facade.py} +25 -24
  70. model_compression_toolkit/core/keras/tf_tensor_numpy.py +4 -2
  71. model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
  72. model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +6 -11
  73. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
  74. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py +1 -1
  75. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
  76. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +5 -5
  77. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +1 -1
  78. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +1 -1
  79. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  80. model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +3 -7
  81. model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py +1 -2
  82. model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py +2 -2
  83. model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
  84. model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -2
  85. model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +3 -3
  86. model_compression_toolkit/core/pytorch/pytorch_implementation.py +1 -1
  87. model_compression_toolkit/core/pytorch/reader/graph_builders.py +5 -7
  88. model_compression_toolkit/core/pytorch/reader/reader.py +2 -2
  89. model_compression_toolkit/core/pytorch/{kpi_data_facade.py → resource_utilization_data_facade.py} +24 -22
  90. model_compression_toolkit/core/pytorch/utils.py +3 -2
  91. model_compression_toolkit/core/runner.py +43 -42
  92. model_compression_toolkit/data_generation/common/data_generation.py +18 -18
  93. model_compression_toolkit/data_generation/common/model_info_exctractors.py +1 -1
  94. model_compression_toolkit/data_generation/keras/keras_data_generation.py +7 -10
  95. model_compression_toolkit/data_generation/keras/model_info_exctractors.py +2 -1
  96. model_compression_toolkit/data_generation/keras/optimization_functions/image_initilization.py +2 -1
  97. model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +2 -4
  98. model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +2 -1
  99. model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +8 -11
  100. model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +1 -1
  101. model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py +2 -3
  102. model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +2 -3
  103. model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +8 -4
  104. model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +2 -2
  105. model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +7 -8
  106. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +19 -12
  107. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +2 -2
  108. model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +10 -11
  109. model_compression_toolkit/gptq/common/gptq_graph.py +3 -3
  110. model_compression_toolkit/gptq/common/gptq_training.py +14 -12
  111. model_compression_toolkit/gptq/keras/gptq_training.py +10 -8
  112. model_compression_toolkit/gptq/keras/graph_info.py +1 -1
  113. model_compression_toolkit/gptq/keras/quantization_facade.py +15 -17
  114. model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +4 -5
  115. model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +1 -2
  116. model_compression_toolkit/gptq/pytorch/gptq_training.py +10 -8
  117. model_compression_toolkit/gptq/pytorch/graph_info.py +1 -1
  118. model_compression_toolkit/gptq/pytorch/quantization_facade.py +11 -13
  119. model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +3 -4
  120. model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
  121. model_compression_toolkit/logger.py +1 -13
  122. model_compression_toolkit/pruning/keras/pruning_facade.py +11 -12
  123. model_compression_toolkit/pruning/pytorch/pruning_facade.py +11 -12
  124. model_compression_toolkit/ptq/keras/quantization_facade.py +13 -14
  125. model_compression_toolkit/ptq/pytorch/quantization_facade.py +7 -8
  126. model_compression_toolkit/qat/keras/quantization_facade.py +20 -22
  127. model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +2 -3
  128. model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +1 -1
  129. model_compression_toolkit/qat/pytorch/quantization_facade.py +12 -14
  130. model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +2 -3
  131. model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +1 -1
  132. model_compression_toolkit/target_platform_capabilities/immutable.py +4 -2
  133. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +4 -8
  134. model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +1 -1
  135. model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +43 -8
  136. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +13 -18
  137. model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
  138. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py +2 -2
  139. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py +2 -1
  140. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +5 -5
  141. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -2
  142. model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +13 -13
  143. model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py +14 -7
  144. model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +5 -5
  145. model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -3
  146. model_compression_toolkit/trainable_infrastructure/keras/load_model.py +4 -5
  147. model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +3 -4
  148. model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +3 -3
  149. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +0 -112
  150. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +0 -105
  151. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +0 -26
  152. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/LICENSE.md +0 -0
  153. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/WHEEL +0 -0
  154. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/top_level.txt +0 -0
  155. /model_compression_toolkit/core/common/mixed_precision/{kpi_tools → resource_utilization_tools}/__init__.py +0 -0
@@ -0,0 +1,105 @@
1
+ # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import copy
16
+ from enum import Enum
17
+ from functools import partial
18
+ from typing import List, Any
19
+ import numpy as np
20
+
21
+ from pulp import lpSum
22
+
23
+
24
+ def sum_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[Any]:
25
+ """
26
+ Aggregates resource utilization vector to a single resource utilization measure by summing all values.
27
+
28
+ Args:
29
+ ru_vector: A vector with nodes' resource utilization values.
30
+ set_constraints: A flag for utilizing the method for resource utilization computation of a
31
+ given config not for LP formalization purposes.
32
+
33
+ Returns: A list with an lpSum object for lp problem definition with the vector's sum.
34
+
35
+ """
36
+ if not set_constraints:
37
+ return [0] if len(ru_vector) == 0 else [sum(ru_vector)]
38
+ return [lpSum(ru_vector)]
39
+
40
+
41
+ def max_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[float]:
42
+ """
43
+ Aggregates resource utilization vector to allow max constraint in the linear programming problem formalization.
44
+ In order to do so, we need to define a separate constraint on each value in the resource utilization vector,
45
+ to be bounded by the target resource utilization.
46
+
47
+ Args:
48
+ ru_vector: A vector with nodes' resource utilization values.
49
+ set_constraints: A flag for utilizing the method for resource utilization computation of a
50
+ given config not for LP formalization purposes.
51
+
52
+ Returns: A list with the vector's values, to be used to define max constraint
53
+ in the linear programming problem formalization.
54
+
55
+ """
56
+ if not set_constraints:
57
+ return [0] if len(ru_vector) == 0 else [max(ru_vector)]
58
+ return [ru for ru in ru_vector]
59
+
60
+
61
+ def total_ru(ru_tensor: np.ndarray, set_constraints: bool = True) -> List[float]:
62
+ """
63
+ Aggregates resource utilization vector to allow weights and activation total utilization constraint in the linear programming
64
+ problem formalization. In order to do so, we need to define a separate constraint on each activation memory utilization value in
65
+ the resource utilization vector, combined with the sum weights memory utilization.
66
+ Note that the given ru_tensor should contain weights and activation utilization values in each entry.
67
+
68
+ Args:
69
+ ru_tensor: A tensor with nodes' resource utilization values for weights and activation.
70
+ set_constraints: A flag for utilizing the method for resource utilization computation of a
71
+ given config not for LP formalization purposes.
72
+
73
+ Returns: A list with lpSum objects, to be used to define total constraint
74
+ in the linear programming problem formalization.
75
+
76
+ """
77
+ if not set_constraints:
78
+ weights_ru = sum([ru[0] for ru in ru_tensor])
79
+ activation_ru = max([ru[1] for ru in ru_tensor])
80
+ return [weights_ru + activation_ru]
81
+
82
+ weights_ru = lpSum([ru[0] for ru in ru_tensor])
83
+ total_ru = [weights_ru + activation_ru for _, activation_ru in ru_tensor]
84
+
85
+ return total_ru
86
+
87
+
88
+ class MpRuAggregation(Enum):
89
+ """
90
+ Defines resource utilization aggregation functions that can be used to compute final resource utilization metric.
91
+ The enum values can be used to call a function on a set of arguments.
92
+
93
+ SUM - applies the sum_ru_values function
94
+
95
+ MAX - applies the max_ru_values function
96
+
97
+ TOTAL - applies the total_ru function
98
+
99
+ """
100
+ SUM = partial(sum_ru_values)
101
+ MAX = partial(max_ru_values)
102
+ TOTAL = partial(total_ru)
103
+
104
+ def __call__(self, *args):
105
+ return self.value(*args)
@@ -0,0 +1,26 @@
1
+ # Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
16
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
17
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
18
+
19
+
20
+ # When adding a RUTarget that we want to consider in our mp search,
21
+ # a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
22
+ # aggregation function should be added to this dictionary
23
+ ru_functions_mapping = {RUTarget.WEIGHTS: (MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
24
+ RUTarget.ACTIVATION: (MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
25
+ RUTarget.TOTAL: (MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
26
+ RUTarget.BOPS: (MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
@@ -28,14 +28,14 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
28
28
  from model_compression_toolkit.logger import Logger
29
29
 
30
30
 
31
- def weights_size_kpi(mp_cfg: List[int],
32
- graph: Graph,
33
- fw_info: FrameworkInfo,
34
- fw_impl: FrameworkImplementation) -> np.ndarray:
31
+ def weights_size_utilization(mp_cfg: List[int],
32
+ graph: Graph,
33
+ fw_info: FrameworkInfo,
34
+ fw_impl: FrameworkImplementation) -> np.ndarray:
35
35
  """
36
- Computes a KPIs vector with the respective weights' memory size for the given weight configurable node,
36
+ Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
37
37
  according to the given mixed-precision configuration.
38
- If an empty configuration is given, then computes KPI vector for non-configurable nodes.
38
+ If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
39
39
 
40
40
  Args:
41
41
  mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -52,7 +52,7 @@ def weights_size_kpi(mp_cfg: List[int],
52
52
  weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
53
53
 
54
54
  if len(mp_cfg) == 0:
55
- # Computing non-configurable nodes KPI
55
+ # Computing non-configurable nodes resource utilization
56
56
  # TODO: when enabling multiple attribute quantization by default (currently,
57
57
  # only kernel quantization is enabled) we should include other attributes memory in the sum of all
58
58
  # weights memory (when quantized to their default 8-bit, non-configurable).
@@ -71,7 +71,8 @@ def weights_size_kpi(mp_cfg: List[int],
71
71
  node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
72
72
  weights_memory.append(node_weights_memory_in_bytes)
73
73
  else:
74
- # Go over configurable all nodes that should be taken into consideration when computing the weights KPI.
74
+ # Go over configurable all nodes that should be taken into consideration when computing the weights
75
+ # resource utilization.
75
76
  for n in graph.get_sorted_weights_configurable_nodes(fw_info):
76
77
  # Only nodes with kernel op can be considered configurable
77
78
  kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
@@ -86,14 +87,14 @@ def weights_size_kpi(mp_cfg: List[int],
86
87
  return np.array(weights_memory)
87
88
 
88
89
 
89
- def activation_output_size_kpi(mp_cfg: List[int],
90
- graph: Graph,
91
- fw_info: FrameworkInfo,
92
- fw_impl: FrameworkImplementation) -> np.ndarray:
90
+ def activation_output_size_utilization(mp_cfg: List[int],
91
+ graph: Graph,
92
+ fw_info: FrameworkInfo,
93
+ fw_impl: FrameworkImplementation) -> np.ndarray:
93
94
  """
94
- Computes a KPIs vector with the respective output memory size for each activation configurable node,
95
+ Computes a resource utilization vector with the respective output memory size for each activation configurable node,
95
96
  according to the given mixed-precision configuration.
96
- If an empty configuration is given, then computes KPI vector for non-configurable nodes.
97
+ If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
97
98
 
98
99
  Args:
99
100
  mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -111,7 +112,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
111
112
  activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
112
113
 
113
114
  if len(mp_cfg) == 0:
114
- # Computing non-configurable nodes KPI
115
+ # Computing non-configurable nodes resource utilization
115
116
  for n in graph.nodes:
116
117
  non_configurable_node = n.name not in activation_mp_nodes \
117
118
  and n.has_activation_quantization_enabled_candidate() \
@@ -122,7 +123,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
122
123
  node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
123
124
  activation_memory.append(node_activation_memory_in_bytes)
124
125
  else:
125
- # Go over all nodes that should be taken into consideration when computing the weights KPI.
126
+ # Go over all nodes that should be taken into consideration when computing the weights memory utilization.
126
127
  for n in graph.get_sorted_activation_configurable_nodes():
127
128
  node_idx = mp_nodes.index(n.name)
128
129
  node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
@@ -135,14 +136,14 @@ def activation_output_size_kpi(mp_cfg: List[int],
135
136
  return np.array(activation_memory)
136
137
 
137
138
 
138
- def total_weights_activation_kpi(mp_cfg: List[int],
139
- graph: Graph,
140
- fw_info: FrameworkInfo,
141
- fw_impl: FrameworkImplementation) -> np.ndarray:
139
+ def total_weights_activation_utilization(mp_cfg: List[int],
140
+ graph: Graph,
141
+ fw_info: FrameworkInfo,
142
+ fw_impl: FrameworkImplementation) -> np.ndarray:
142
143
  """
143
- Computes KPIs tensor with the respective weights size and output memory size for each activation configurable node,
144
+ Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
144
145
  according to the given mixed-precision configuration.
145
- If an empty configuration is given, then computes KPI vector for non-configurable nodes.
146
+ If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
146
147
 
147
148
  Args:
148
149
  mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
@@ -160,15 +161,15 @@ def total_weights_activation_kpi(mp_cfg: List[int],
160
161
  activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
161
162
 
162
163
  if len(mp_cfg) == 0:
163
- # Computing non-configurable nodes KPI
164
+ # Computing non-configurable nodes utilization
164
165
  for n in graph.nodes:
165
166
 
166
167
  non_configurable = False
167
168
  node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
168
169
 
169
170
  # Non-configurable Weights
170
- # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute
171
- # quantization we need to modify this method to count all attributes.
171
+ # TODO: currently considering only kernel attributes in weights memory utilization.
172
+ # When enabling multi-attribute quantization we need to modify this method to count all attributes.
172
173
  kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
173
174
  if kernel_attr is not None:
174
175
  is_non_configurable_weights = n.name not in weights_mp_nodes and \
@@ -196,9 +197,9 @@ def total_weights_activation_kpi(mp_cfg: List[int],
196
197
  np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
197
198
  else:
198
199
  # Go over all nodes that should be taken into consideration when computing the weights or
199
- # activation KPI (all configurable nodes).
200
+ # activation memory utilization (all configurable nodes).
200
201
  for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
201
- # TODO: currently considering only kernel attributes in weights KPI. When enabling multi-attribute
202
+ # TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
202
203
  # quantization we need to modify this method to count all attributes.
203
204
 
204
205
  node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
@@ -222,13 +223,13 @@ def total_weights_activation_kpi(mp_cfg: List[int],
222
223
  return np.array(weights_activation_memory)
223
224
 
224
225
 
225
- def bops_kpi(mp_cfg: List[int],
226
- graph: Graph,
227
- fw_info: FrameworkInfo,
228
- fw_impl: FrameworkImplementation,
229
- set_constraints: bool = True) -> np.ndarray:
226
+ def bops_utilization(mp_cfg: List[int],
227
+ graph: Graph,
228
+ fw_info: FrameworkInfo,
229
+ fw_impl: FrameworkImplementation,
230
+ set_constraints: bool = True) -> np.ndarray:
230
231
  """
231
- Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node,
232
+ Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
232
233
  according to the given mixed-precision configuration of a virtual graph with composed nodes.
233
234
 
234
235
  Args:
@@ -236,7 +237,7 @@ def bops_kpi(mp_cfg: List[int],
236
237
  graph: Graph object.
237
238
  fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
238
239
  fw_impl: FrameworkImplementation object with specific framework methods implementation.
239
- set_constraints: A flag for utilizing the method for KPI computation of a
240
+ set_constraints: A flag for utilizing the method for resource utilization computation of a
240
241
  given config not for LP formalization purposes.
241
242
 
242
243
  Returns: A vector of node's BOPS count.
@@ -245,12 +246,12 @@ def bops_kpi(mp_cfg: List[int],
245
246
  """
246
247
 
247
248
  if not set_constraints:
248
- return _bops_kpi(mp_cfg,
249
- graph,
250
- fw_info,
251
- fw_impl)
249
+ return _bops_utilization(mp_cfg,
250
+ graph,
251
+ fw_info,
252
+ fw_impl)
252
253
 
253
- # BOPs KPI method considers non-configurable nodes, therefore, it doesn't need separate implementation
254
+ # BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
254
255
  # for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
255
256
 
256
257
  virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
@@ -261,12 +262,12 @@ def bops_kpi(mp_cfg: List[int],
261
262
  return np.array(bops)
262
263
 
263
264
 
264
- def _bops_kpi(mp_cfg: List[int],
265
- graph: Graph,
266
- fw_info: FrameworkInfo,
267
- fw_impl: FrameworkImplementation) -> np.ndarray:
265
+ def _bops_utilization(mp_cfg: List[int],
266
+ graph: Graph,
267
+ fw_info: FrameworkInfo,
268
+ fw_impl: FrameworkImplementation) -> np.ndarray:
268
269
  """
269
- Computes a KPIs vector with the respective bit-operations (BOPS) count for each configurable node,
270
+ Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
270
271
  according to the given mixed-precision configuration of an original graph.
271
272
 
272
273
  Args:
@@ -281,19 +282,18 @@ def _bops_kpi(mp_cfg: List[int],
281
282
 
282
283
  mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
283
284
 
284
- # Go over all nodes that should be taken into consideration when computing the BOPS KPI.
285
+ # Go over all nodes that should be taken into consideration when computing the BOPS utilization.
285
286
  bops = []
286
287
  for n in graph.get_topo_sorted_nodes():
287
288
  if n.has_kernel_weight_to_quantize(fw_info):
288
289
  # If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
289
290
  incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
290
291
  if len(incoming_edges) != 1:
291
- Logger.critical(f"Can't compute BOPS metric for node {n.name} with multiple inputs.") # pragma: no cover
292
-
292
+ Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.") # pragma: no cover
293
293
  input_activation_node = incoming_edges[0].source_node
294
294
  if len(graph.out_edges(input_activation_node)) > 1:
295
295
  # In the case where the activation node has multiple outgoing edges
296
- # we don't consider this edge in the BOPS KPI calculation
296
+ # we don't consider this edge in the BOPS utilization calculation
297
297
  continue
298
298
 
299
299
  input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
@@ -338,7 +338,7 @@ def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nod
338
338
 
339
339
  def _get_origin_weights_node(n: BaseNode) -> BaseNode:
340
340
  """
341
- In case we run a KPI computation on a virtual graph,
341
+ In case we run a resource utilization computation on a virtual graph,
342
342
  this method is used to retrieve the original node out of a virtual weights node,
343
343
 
344
344
  Args:
@@ -358,7 +358,7 @@ def _get_origin_weights_node(n: BaseNode) -> BaseNode:
358
358
 
359
359
  def _get_origin_activation_node(n: BaseNode) -> BaseNode:
360
360
  """
361
- In case we run a KPI computation on a virtual graph,
361
+ In case we run a resource utilization computation on a virtual graph,
362
362
  this method is used to retrieve the original node out of a virtual activation node,
363
363
 
364
364
  Args:
@@ -417,25 +417,25 @@ def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
417
417
  return node_output_size * node_nbits / BITS_TO_BYTES
418
418
 
419
419
 
420
- class MpKpiMetric(Enum):
420
+ class MpRuMetric(Enum):
421
421
  """
422
- Defines kpi computation functions that can be used to compute KPI for a given target for a given mp config.
423
- The enum values can be used to call a function on a set of arguments.
422
+ Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
423
+ for a given mp config. The enum values can be used to call a function on a set of arguments.
424
424
 
425
- WEIGHTS_SIZE - applies the weights_size_kpi function
425
+ WEIGHTS_SIZE - applies the weights_size_utilization function
426
426
 
427
- ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_kpi function
427
+ ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
428
428
 
429
- TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_kpi function
429
+ TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
430
430
 
431
- BOPS_COUNT - applies the bops_kpi function
431
+ BOPS_COUNT - applies the bops_utilization function
432
432
 
433
433
  """
434
434
 
435
- WEIGHTS_SIZE = partial(weights_size_kpi)
436
- ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_kpi)
437
- TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_kpi)
438
- BOPS_COUNT = partial(bops_kpi)
435
+ WEIGHTS_SIZE = partial(weights_size_utilization)
436
+ ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
437
+ TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
438
+ BOPS_COUNT = partial(bops_utilization)
439
439
 
440
440
  def __call__(self, *args):
441
441
  return self.value(*args)