mct-nightly 1.11.0.20240320.400__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/METADATA +17 -9
  2. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/RECORD +152 -152
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +1 -1
  5. model_compression_toolkit/core/__init__.py +3 -3
  6. model_compression_toolkit/core/common/collectors/base_collector.py +2 -2
  7. model_compression_toolkit/core/common/data_loader.py +3 -3
  8. model_compression_toolkit/core/common/graph/base_graph.py +10 -13
  9. model_compression_toolkit/core/common/graph/base_node.py +3 -3
  10. model_compression_toolkit/core/common/graph/edge.py +2 -1
  11. model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py +2 -4
  12. model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +2 -2
  13. model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
  14. model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py +3 -5
  15. model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +1 -2
  16. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +24 -23
  17. model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +110 -112
  18. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +114 -0
  19. model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py → resource_utilization_tools/resource_utilization_data.py} +19 -19
  20. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +105 -0
  21. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +26 -0
  22. model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} +61 -61
  23. model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +75 -71
  24. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +2 -4
  25. model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +34 -34
  26. model_compression_toolkit/core/common/model_collector.py +2 -2
  27. model_compression_toolkit/core/common/network_editors/actions.py +3 -3
  28. model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +12 -12
  29. model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +2 -2
  30. model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +2 -2
  31. model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +2 -2
  32. model_compression_toolkit/core/common/pruning/memory_calculator.py +7 -7
  33. model_compression_toolkit/core/common/pruning/prune_graph.py +2 -3
  34. model_compression_toolkit/core/common/pruning/pruner.py +7 -7
  35. model_compression_toolkit/core/common/pruning/pruning_config.py +1 -1
  36. model_compression_toolkit/core/common/pruning/pruning_info.py +2 -2
  37. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +7 -4
  38. model_compression_toolkit/core/common/quantization/node_quantization_config.py +3 -1
  39. model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +4 -2
  40. model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +4 -6
  41. model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +2 -4
  42. model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -1
  43. model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py +8 -6
  44. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +2 -2
  45. model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +4 -6
  46. model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +4 -7
  47. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +3 -3
  48. model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +1 -1
  49. model_compression_toolkit/core/common/substitutions/weights_activation_split.py +3 -3
  50. model_compression_toolkit/core/common/user_info.py +1 -1
  51. model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
  52. model_compression_toolkit/core/keras/back2framework/instance_builder.py +2 -2
  53. model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +4 -8
  54. model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py +3 -2
  55. model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +2 -2
  56. model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py +1 -1
  57. model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py +1 -1
  58. model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  59. model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +3 -3
  60. model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py +1 -2
  61. model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py +5 -6
  62. model_compression_toolkit/core/keras/keras_implementation.py +1 -1
  63. model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +1 -1
  64. model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +2 -4
  65. model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +1 -1
  66. model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py +7 -7
  67. model_compression_toolkit/core/keras/reader/common.py +2 -2
  68. model_compression_toolkit/core/keras/reader/node_builder.py +1 -1
  69. model_compression_toolkit/core/keras/{kpi_data_facade.py → resource_utilization_data_facade.py} +25 -24
  70. model_compression_toolkit/core/keras/tf_tensor_numpy.py +4 -2
  71. model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
  72. model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +6 -11
  73. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
  74. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py +1 -1
  75. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
  76. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +5 -5
  77. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +1 -1
  78. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +1 -1
  79. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  80. model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +3 -7
  81. model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py +1 -2
  82. model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py +2 -2
  83. model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
  84. model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -2
  85. model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +3 -3
  86. model_compression_toolkit/core/pytorch/pytorch_implementation.py +1 -1
  87. model_compression_toolkit/core/pytorch/reader/graph_builders.py +5 -7
  88. model_compression_toolkit/core/pytorch/reader/reader.py +2 -2
  89. model_compression_toolkit/core/pytorch/{kpi_data_facade.py → resource_utilization_data_facade.py} +24 -22
  90. model_compression_toolkit/core/pytorch/utils.py +3 -2
  91. model_compression_toolkit/core/runner.py +43 -42
  92. model_compression_toolkit/data_generation/common/data_generation.py +18 -18
  93. model_compression_toolkit/data_generation/common/model_info_exctractors.py +1 -1
  94. model_compression_toolkit/data_generation/keras/keras_data_generation.py +7 -10
  95. model_compression_toolkit/data_generation/keras/model_info_exctractors.py +2 -1
  96. model_compression_toolkit/data_generation/keras/optimization_functions/image_initilization.py +2 -1
  97. model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +2 -4
  98. model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +2 -1
  99. model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +8 -11
  100. model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +1 -1
  101. model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py +2 -3
  102. model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +2 -3
  103. model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +8 -4
  104. model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +2 -2
  105. model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +7 -8
  106. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +19 -12
  107. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +2 -2
  108. model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +10 -11
  109. model_compression_toolkit/gptq/common/gptq_graph.py +3 -3
  110. model_compression_toolkit/gptq/common/gptq_training.py +14 -12
  111. model_compression_toolkit/gptq/keras/gptq_training.py +10 -8
  112. model_compression_toolkit/gptq/keras/graph_info.py +1 -1
  113. model_compression_toolkit/gptq/keras/quantization_facade.py +15 -17
  114. model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +4 -5
  115. model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +1 -2
  116. model_compression_toolkit/gptq/pytorch/gptq_training.py +10 -8
  117. model_compression_toolkit/gptq/pytorch/graph_info.py +1 -1
  118. model_compression_toolkit/gptq/pytorch/quantization_facade.py +11 -13
  119. model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +3 -4
  120. model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
  121. model_compression_toolkit/logger.py +1 -13
  122. model_compression_toolkit/pruning/keras/pruning_facade.py +11 -12
  123. model_compression_toolkit/pruning/pytorch/pruning_facade.py +11 -12
  124. model_compression_toolkit/ptq/keras/quantization_facade.py +13 -14
  125. model_compression_toolkit/ptq/pytorch/quantization_facade.py +7 -8
  126. model_compression_toolkit/qat/keras/quantization_facade.py +20 -22
  127. model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +2 -3
  128. model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +1 -1
  129. model_compression_toolkit/qat/pytorch/quantization_facade.py +12 -14
  130. model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +2 -3
  131. model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +1 -1
  132. model_compression_toolkit/target_platform_capabilities/immutable.py +4 -2
  133. model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +4 -8
  134. model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +1 -1
  135. model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +43 -8
  136. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +13 -18
  137. model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
  138. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py +2 -2
  139. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py +2 -1
  140. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +5 -5
  141. model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -2
  142. model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +13 -13
  143. model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py +14 -7
  144. model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +5 -5
  145. model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -3
  146. model_compression_toolkit/trainable_infrastructure/keras/load_model.py +4 -5
  147. model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +3 -4
  148. model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +3 -3
  149. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +0 -112
  150. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +0 -105
  151. model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +0 -26
  152. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/LICENSE.md +0 -0
  153. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/WHEEL +0 -0
  154. {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/top_level.txt +0 -0
  155. /model_compression_toolkit/core/common/mixed_precision/{kpi_tools → resource_utilization_tools}/__init__.py +0 -0
@@ -23,9 +23,9 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
23
23
  from model_compression_toolkit.core.common.graph.base_graph import Graph
24
24
  from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
25
25
  VirtualSplitWeightsNode, VirtualSplitActivationNode
26
- from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPITarget, KPI
27
- from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_aggregation_methods import MpKpiAggregation
28
- from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi_methods import MpKpiMetric
26
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
27
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
28
+ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
29
29
  from model_compression_toolkit.core.common.framework_info import FrameworkInfo
30
30
  from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
31
31
 
@@ -40,8 +40,8 @@ class MixedPrecisionSearchManager:
40
40
  fw_info: FrameworkInfo,
41
41
  fw_impl: FrameworkImplementation,
42
42
  sensitivity_evaluator: SensitivityEvaluation,
43
- kpi_functions: Dict[KPITarget, Tuple[MpKpiMetric, MpKpiAggregation]],
44
- target_kpi: KPI,
43
+ ru_functions: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
44
+ target_resource_utilization: ResourceUtilization,
45
45
  original_graph: Graph = None):
46
46
  """
47
47
 
@@ -51,10 +51,10 @@ class MixedPrecisionSearchManager:
51
51
  fw_impl: FrameworkImplementation object with specific framework methods implementation.
52
52
  sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
53
53
  a bit-width configuration for the MP model.
54
- kpi_functions: A dictionary with pairs of (MpKpiMethod, MpKpiAggregationMethod) mapping a KPITarget to
55
- a couple of kpi metric function and kpi aggregation function.
56
- target_kpi: Target KPI to bound our feasible solution space s.t the configuration does not violate it.
57
- original_graph: In case we have a search over a virtual graph (if we have BOPS KPI target), then this argument
54
+ ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to
55
+ a couple of resource utilization metric function and resource utilization aggregation function.
56
+ target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
57
+ original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
58
58
  will contain the original graph (for config reconstruction purposes).
59
59
  """
60
60
 
@@ -66,12 +66,12 @@ class MixedPrecisionSearchManager:
66
66
  self.layer_to_bitwidth_mapping = self.get_search_space()
67
67
  self.compute_metric_fn = self.get_sensitivity_metric()
68
68
 
69
- self.compute_kpi_functions = kpi_functions
70
- self.target_kpi = target_kpi
71
- self.min_kpi_config = self.graph.get_min_candidates_config(fw_info)
72
- self.max_kpi_config = self.graph.get_max_candidates_config(fw_info)
73
- self.min_kpi = self.compute_min_kpis()
74
- self.non_conf_kpi_dict = self._non_configurable_nodes_kpi()
69
+ self.compute_ru_functions = ru_functions
70
+ self.target_resource_utilization = target_resource_utilization
71
+ self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
72
+ self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
73
+ self.min_ru = self.compute_min_ru()
74
+ self.non_conf_ru_dict = self._non_configurable_nodes_ru()
75
75
 
76
76
  self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
77
77
  original_graph=self.original_graph)
@@ -106,112 +106,114 @@ class MixedPrecisionSearchManager:
106
106
 
107
107
  return self.sensitivity_evaluator.compute_metric
108
108
 
109
- def compute_min_kpis(self) -> Dict[KPITarget, np.ndarray]:
109
+ def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
110
110
  """
111
- Computes a KPIs vector with the values matching to the minimal mp configuration
111
+ Computes a resource utilization vector with the values matching to the minimal mp configuration
112
112
  (i.e., each node is configured with the quantization candidate that would give the minimal size of the
113
- node's KPI).
114
- The method computes the minimal KPIs vector for each kpi target.
113
+ node's resource utilization).
114
+ The method computes the minimal resource utilization vector for each target resource utilization.
115
115
 
116
- Returns: A dictionary mapping each kpi target to its respective minimal KPIs values.
116
+ Returns: A dictionary mapping each target resource utilization to its respective minimal
117
+ resource utilization values.
117
118
 
118
119
  """
119
- min_kpis = {}
120
- for kpi_target, kpi_fns in self.compute_kpi_functions.items():
121
- # kpi_fns is a pair of kpi computation method and kpi aggregation method (in this method we only need
122
- # the first one)
123
- min_kpis[kpi_target] = kpi_fns[0](self.min_kpi_config, self.graph, self.fw_info, self.fw_impl)
120
+ min_ru = {}
121
+ for ru_target, ru_fns in self.compute_ru_functions.items():
122
+ # ru_fns is a pair of resource utilization computation method and
123
+ # resource utilization aggregation method (in this method we only need the first one)
124
+ min_ru[ru_target] = ru_fns[0](self.min_ru_config, self.graph, self.fw_info, self.fw_impl)
124
125
 
125
- return min_kpis
126
+ return min_ru
126
127
 
127
- def compute_kpi_matrix(self, target: KPITarget) -> np.ndarray:
128
+ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
128
129
  """
129
- Computes and builds a KPIs matrix, to be used for the mixed-precision search problem formalization.
130
+ Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
130
131
  The matrix is constructed as follows (for a given target):
131
- - Each row represents the set of KPI values for a specific KPI measure (number of rows should be equal to the
132
- length of the output of the respective target compute_kpi function).
133
- - Each entry in a specific column represents the KPI value of a given configuration (single layer is configured
134
- with specific candidate, all other layer are at the minimal KPI configuration) for the KPI measure of the
135
- respective row.
132
+ - Each row represents the set of resource utilization values for a specific resource utilization
133
+ measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
134
+ - Each entry in a specific column represents the resource utilization value of a given configuration
135
+ (single layer is configured with specific candidate, all other layer are at the minimal resource
136
+ utilization configuration) for the resource utilization measure of the respective row.
136
137
 
137
138
  Args:
138
- target: The target for which the KPI is calculated (a KPITarget value).
139
+ target: The resource target for which the resource utilization is calculated (a RUTarget value).
139
140
 
140
- Returns: A KPI matrix.
141
+ Returns: A resource utilization matrix.
141
142
 
142
143
  """
143
- assert isinstance(target, KPITarget), f"{target} is not a valid KPI target"
144
+ assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
144
145
 
145
146
  configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
146
147
 
147
- kpi_matrix = []
148
+ ru_matrix = []
148
149
  for c, c_n in enumerate(configurable_sorted_nodes):
149
150
  for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
150
- if candidate_idx == self.min_kpi_config[c]:
151
- # skip KPI computation for min configuration. Since we compute the difference from min_kpi it'll
151
+ if candidate_idx == self.min_ru_config[c]:
152
+ # skip ru computation for min configuration. Since we compute the difference from min_ru it'll
152
153
  # always be 0 for all entries in the results vector.
153
- candidate_kpis = np.zeros(shape=self.min_kpi[target].shape)
154
+ candidate_rus = np.zeros(shape=self.min_ru[target].shape)
154
155
  else:
155
- candidate_kpis = self.compute_candidate_relative_kpis(c, candidate_idx, target)
156
- kpi_matrix.append(np.asarray(candidate_kpis))
156
+ candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target)
157
+ ru_matrix.append(np.asarray(candidate_rus))
157
158
 
158
- # We need to transpose the calculated kpi matrix to allow later multiplication with
159
+ # We need to transpose the calculated ru matrix to allow later multiplication with
159
160
  # the indicators' diagonal matrix.
160
161
  # We only move the first axis (num of configurations) to be last,
161
162
  # the remaining axes include the metric specific nodes (rows dimension of the new tensor)
162
- # and the kpi metric values (if they are non-scalars)
163
- np_kpi_matrix = np.array(kpi_matrix)
164
- return np.moveaxis(np_kpi_matrix, source=0, destination=len(np_kpi_matrix.shape) - 1)
165
-
166
- def compute_candidate_relative_kpis(self,
167
- conf_node_idx: int,
168
- candidate_idx: int,
169
- target: KPITarget) -> np.ndarray:
170
- """
171
- Computes a KPIs vector for a given candidates of a given configurable node, i.e., the matching KPI vector
172
- which is obtained by computing the given target's KPI function on a minimal configuration in which the given
163
+ # and the ru metric values (if they are non-scalars)
164
+ np_ru_matrix = np.array(ru_matrix)
165
+ return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
166
+
167
+ def compute_candidate_relative_ru(self,
168
+ conf_node_idx: int,
169
+ candidate_idx: int,
170
+ target: RUTarget) -> np.ndarray:
171
+ """
172
+ Computes a resource utilization vector for a given candidates of a given configurable node,
173
+ i.e., the matching resource utilization vector which is obtained by computing the given target's
174
+ resource utilization function on a minimal configuration in which the given
173
175
  layer's candidates is changed to the new given one.
174
- The result is normalized by subtracting the target's minimal KPIs vector.
176
+ The result is normalized by subtracting the target's minimal resource utilization vector.
175
177
 
176
178
  Args:
177
179
  conf_node_idx: The index of a node in a sorted configurable nodes list.
178
180
  candidate_idx: The index of a node's quantization configuration candidate.
179
- target: The target for which the KPI is calculated (a KPITarget value).
181
+ target: The target for which the resource utilization is calculated (a RUTarget value).
180
182
 
181
- Returns: Normalized node's KPIs vector
183
+ Returns: Normalized node's resource utilization vector
182
184
 
183
185
  """
184
- return self.compute_node_kpi_for_candidate(conf_node_idx, candidate_idx, target) - \
185
- self.get_min_target_kpi(target)
186
+ return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \
187
+ self.get_min_target_resource_utilization(target)
186
188
 
187
- def get_min_target_kpi(self, target: KPITarget) -> np.ndarray:
189
+ def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray:
188
190
  """
189
- Returns the minimal KPIs vector (pre-calculated on initialization) of a specific target.
191
+ Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target.
190
192
 
191
193
  Args:
192
- target: The target for which the KPI is calculated (a KPITarget value).
194
+ target: The target for which the resource utilization is calculated (a RUTarget value).
193
195
 
194
- Returns: Minimal KPIs vector.
196
+ Returns: Minimal resource utilization vector.
195
197
 
196
198
  """
197
- return self.min_kpi[target]
199
+ return self.min_ru[target]
198
200
 
199
- def compute_node_kpi_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: KPITarget) -> np.ndarray:
201
+ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
200
202
  """
201
- Computes a KPIs vector after replacing the given node's configuration candidate in the minimal
203
+ Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
202
204
  target configuration with the given candidate index.
203
205
 
204
206
  Args:
205
207
  conf_node_idx: The index of a node in a sorted configurable nodes list.
206
- candidate_idx: Quantization config candidate to be used for the node's KPI computation.
207
- target: The target for which the KPI is calculated (a KPITarget value).
208
+ candidate_idx: Quantization config candidate to be used for the node's resource utilization computation.
209
+ target: The target for which the resource utilization is calculated (a RUTarget value).
208
210
 
209
- Returns: Node's KPIs vector.
211
+ Returns: Node's resource utilization vector.
210
212
 
211
213
  """
212
- return self.compute_kpi_functions[target][0](
214
+ return self.compute_ru_functions[target][0](
213
215
  self.replace_config_in_index(
214
- self.min_kpi_config,
216
+ self.min_ru_config,
215
217
  conf_node_idx,
216
218
  candidate_idx),
217
219
  self.graph,
@@ -236,58 +238,60 @@ class MixedPrecisionSearchManager:
236
238
  updated_cfg[idx] = value
237
239
  return updated_cfg
238
240
 
239
- def _non_configurable_nodes_kpi(self) -> Dict[KPITarget, np.ndarray]:
241
+ def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
240
242
  """
241
- Computes a KPI vector of all non-configurable nodes in the given graph for each of the KPI target.
243
+ Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
244
+ resource utilization targets.
242
245
 
243
- Returns: A mapping between a KPITarget and its non-configurable nodes' KPI vector.
246
+ Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
244
247
  """
245
248
 
246
- non_conf_kpi_dict = {}
247
- for target, kpi_value in self.target_kpi.get_kpi_dict().items():
248
- # Call for the KPI method of the given target - empty quantization configuration list is passed since we
249
+ non_conf_ru_dict = {}
250
+ for target, ru_value in self.target_resource_utilization.get_resource_utilization_dict().items():
251
+ # Call for the ru method of the given target - empty quantization configuration list is passed since we
249
252
  # compute for non-configurable nodes
250
- if target == KPITarget.BOPS:
251
- kpi_vector = None
253
+ if target == RUTarget.BOPS:
254
+ ru_vector = None
252
255
  else:
253
- kpi_vector = self.compute_kpi_functions[target][0]([], self.graph, self.fw_info, self.fw_impl)
256
+ ru_vector = self.compute_ru_functions[target][0]([], self.graph, self.fw_info, self.fw_impl)
254
257
 
255
- non_conf_kpi_dict[target] = kpi_vector
258
+ non_conf_ru_dict[target] = ru_vector
256
259
 
257
- return non_conf_kpi_dict
260
+ return non_conf_ru_dict
258
261
 
259
- def compute_kpi_for_config(self, config: List[int]) -> KPI:
262
+ def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
260
263
  """
261
- Computes the KPI values for a given mixed-precision configuration.
264
+ Computes the resource utilization values for a given mixed-precision configuration.
262
265
 
263
266
  Args:
264
267
  config: A mixed-precision configuration (list of candidates indices)
265
268
 
266
- Returns: A KPI object with the model's KPI values when quantized with the given config.
269
+ Returns: A ResourceUtilization object with the model's resource utilization values when quantized
270
+ with the given config.
267
271
 
268
272
  """
269
273
 
270
- kpis_dict = {}
274
+ ru_dict = {}
271
275
 
272
- for kpi_target, kpi_fns in self.compute_kpi_functions.items():
273
- # Passing False to kpi methods and aggregations to indicates that the computations
276
+ for ru_target, ru_fns in self.compute_ru_functions.items():
277
+ # Passing False to ru methods and aggregations to indicates that the computations
274
278
  # are not for constraints setting
275
- if kpi_target == KPITarget.BOPS:
276
- configurable_nodes_kpi_vector = kpi_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False)
279
+ if ru_target == RUTarget.BOPS:
280
+ configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False)
277
281
  else:
278
- configurable_nodes_kpi_vector = kpi_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
279
- non_configurable_nodes_kpi_vector = self.non_conf_kpi_dict.get(kpi_target)
280
- if non_configurable_nodes_kpi_vector is None or len(non_configurable_nodes_kpi_vector) == 0:
281
- aggr_kpi = self.compute_kpi_functions[kpi_target][1](configurable_nodes_kpi_vector, False)
282
+ configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
283
+ non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
284
+ if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
285
+ ru_ru = self.compute_ru_functions[ru_target][1](configurable_nodes_ru_vector, False)
282
286
  else:
283
- aggr_kpi = self.compute_kpi_functions[kpi_target][1](
284
- np.concatenate([configurable_nodes_kpi_vector, non_configurable_nodes_kpi_vector]), False)
287
+ ru_ru = self.compute_ru_functions[ru_target][1](
288
+ np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)
285
289
 
286
- kpis_dict[kpi_target] = aggr_kpi[0]
290
+ ru_dict[ru_target] = ru_ru[0]
287
291
 
288
- config_kpi = KPI()
289
- config_kpi.set_kpi_by_target(kpis_dict)
290
- return config_kpi
292
+ config_ru = ResourceUtilization()
293
+ config_ru.set_resource_utilization_by_target(ru_dict)
294
+ return config_ru
291
295
 
292
296
  def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
293
297
  """
@@ -317,7 +321,7 @@ class MixedPrecisionSearchManager:
317
321
  class ConfigReconstructionHelper:
318
322
  """
319
323
  A class to help reconstruct an original mixed-precision configuration from a virtual one,
320
- when running mixed-precision search with BOPS KPI.
324
+ when running mixed-precision search with BOPS utilization.
321
325
  It provides a reconstruct_config_from_virtual_graph which allows to translate a bit-width config of a virtual graph
322
326
  to a config of the original configurable nodes.
323
327
  """
@@ -375,8 +379,7 @@ class ConfigReconstructionHelper:
375
379
 
376
380
  if changed_virtual_nodes_idx is not None:
377
381
  if original_base_config is None:
378
- Logger.critical("Must provide a base original config in order to run config reconstruction for partial"
379
- "set of nodes.") # pragma: no cover
382
+ Logger.critical("To run config reconstruction for a partial set of nodes, a base original config must be provided.") # pragma: no cover
380
383
 
381
384
  updated_virtual_nodes = \
382
385
  [(idx, self.virtual_graph.get_configurable_sorted_nodes(self.fw_info)[idx]) for idx in changed_virtual_nodes_idx]
@@ -418,9 +421,7 @@ class ConfigReconstructionHelper:
418
421
  if isinstance(weights_node, VirtualSplitWeightsNode):
419
422
  self.get_activation_for_split_weights(weights_node, n, virtual_cfg_idx, virtual_mp_cfg)
420
423
  else:
421
- Logger.error(f"Virtual graph error - all weights nodes should be split to weights and activation nodes"
422
- f"in order to construct the virtual graph, but node {n.name} is not of type "
423
- f"VirtualSplitWeightsNode") # pragma: no cover
424
+ Logger.critical(f"Virtual graph construction error: Expected all weights nodes to be split into weights and activation nodes. Found node '{n.name}' not split as expected. Every weights node should correspond to a VirtualSplitWeightsNode type.") # pragma: no cover
424
425
 
425
426
  activation_node = n.original_activation_node
426
427
  if isinstance(activation_node, VirtualSplitActivationNode):
@@ -441,15 +442,13 @@ class ConfigReconstructionHelper:
441
442
  # It's ok, need to find the node's configuration
442
443
  self.get_activation_for_split_weights(n, n, virtual_cfg_idx, virtual_mp_cfg)
443
444
  else:
444
- Logger.error(f"Virtual graph error - a weights node is not composed with an activation node,"
445
- f"but its predecessor doesn't have multiple outputs.") # pragma: no cover
445
+ Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
446
446
  elif isinstance(n, VirtualSplitActivationNode):
447
447
  self.get_weights_for_split_activation(n, n, virtual_cfg_idx, virtual_mp_cfg)
448
448
  else:
449
449
  # Node didn't change in virtual graph - candidates list is similar to original
450
450
  if n.name not in self.origin_sorted_conf_nodes_names:
451
- Logger.error(f"Node {n.name} appears in virtual graph as configurable, "
452
- f"but is not configurable in the original graph.") # pragma: no cover
451
+ Logger.critical(f"Configuration mismatch: Node '{n.name}' is configurable in the virtual graph but not in the original graph. Verify node configurations.") # pragma: no cover
453
452
  origin_idx = self.origin_sorted_conf_nodes_names.index(n.name)
454
453
  self.origin_node_idx_to_cfg[origin_idx] = virtual_cfg_idx
455
454
 
@@ -654,8 +653,7 @@ class ConfigReconstructionHelper:
654
653
  # It's ok, need to find the node's configuration
655
654
  self.retrieve_weights_activation_config(activation_node, weights_node, virtual_node, virtual_cfg_idx, virtual_mp_cfg)
656
655
  else:
657
- Logger.error(f"Virtual graph error - a weights node is not composed with an activation node,"
658
- f"but its predecessor doesn't have multiple outputs.") # pragma: no cover
656
+ Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
659
657
 
660
658
  def update_config_at_original_idx(self, n: BaseNode, origin_cfg_idx: int):
661
659
  """
@@ -0,0 +1,114 @@
1
+ # Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from enum import Enum
16
+ from typing import Dict, Any
17
+
18
+ import numpy as np
19
+
20
+
21
+ class RUTarget(Enum):
22
+ """
23
+ Targets for which we define Resource Utilization metrics for mixed-precision search.
24
+ For each target that we care to consider in a mixed-precision search, there should be defined a set of
25
+ resource utilization computation function, resource utilization aggregation function,
26
+ and resource utilization target (within a ResourceUtilization object).
27
+
28
+ Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum.
29
+
30
+ WEIGHTS - Weights memory ResourceUtilization metric.
31
+
32
+ ACTIVATION - Activation memory ResourceUtilization metric.
33
+
34
+ TOTAL - Total memory ResourceUtilization metric.
35
+
36
+ BOPS - Total Bit-Operations ResourceUtilization Metric.
37
+
38
+ """
39
+
40
+ WEIGHTS = 'weights'
41
+ ACTIVATION = 'activation'
42
+ TOTAL = 'total'
43
+ BOPS = 'bops'
44
+
45
+
46
+ class ResourceUtilization:
47
+ """
48
+ Class to represent measurements of performance.
49
+ """
50
+
51
+ def __init__(self,
52
+ weights_memory: float = np.inf,
53
+ activation_memory: float = np.inf,
54
+ total_memory: float = np.inf,
55
+ bops: float = np.inf):
56
+ """
57
+
58
+ Args:
59
+ weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).
60
+ activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric.
61
+ total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric.
62
+ bops: The total bit-operations in the model.
63
+ """
64
+ self.weights_memory = weights_memory
65
+ self.activation_memory = activation_memory
66
+ self.total_memory = total_memory
67
+ self.bops = bops
68
+
69
+ def __repr__(self):
70
+ return f"Weights_memory: {self.weights_memory}, " \
71
+ f"Activation_memory: {self.activation_memory}, " \
72
+ f"Total_memory: {self.total_memory}, " \
73
+ f"BOPS: {self.bops}"
74
+
75
+ def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
76
+ """
77
+ Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
78
+ """
79
+ return {RUTarget.WEIGHTS: self.weights_memory,
80
+ RUTarget.ACTIVATION: self.activation_memory,
81
+ RUTarget.TOTAL: self.total_memory,
82
+ RUTarget.BOPS: self.bops}
83
+
84
+ def set_resource_utilization_by_target(self, ru_mapping: Dict[RUTarget, float]):
85
+ """
86
+ Setting a ResourceUtilization object values for each ResourceUtilization target in the given dictionary.
87
+
88
+ Args:
89
+ ru_mapping: A mapping from a RUTarget to a matching resource utilization value.
90
+
91
+ """
92
+ self.weights_memory = ru_mapping.get(RUTarget.WEIGHTS, np.inf)
93
+ self.activation_memory = ru_mapping.get(RUTarget.ACTIVATION, np.inf)
94
+ self.total_memory = ru_mapping.get(RUTarget.TOTAL, np.inf)
95
+ self.bops = ru_mapping.get(RUTarget.BOPS, np.inf)
96
+
97
+ def holds_constraints(self, ru: Any) -> bool:
98
+ """
99
+ Checks whether the given ResourceUtilization object holds a set of ResourceUtilization constraints defined by
100
+ the current ResourceUtilization object.
101
+
102
+ Args:
103
+ ru: A ResourceUtilization object to check if it holds the constraints.
104
+
105
+ Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values.
106
+
107
+ """
108
+ if not isinstance(ru, ResourceUtilization):
109
+ return False
110
+
111
+ return ru.weights_memory <= self.weights_memory and \
112
+ ru.activation_memory <= self.activation_memory and \
113
+ ru.total_memory <= self.total_memory and \
114
+ ru.bops <= self.bops
@@ -12,26 +12,26 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from typing import Callable, Any
16
15
  import numpy as np
16
+ from typing import Callable, Any
17
17
 
18
- from model_compression_toolkit.core import FrameworkInfo, KPI, CoreConfig
19
- from model_compression_toolkit.core.common import Graph
20
18
  from model_compression_toolkit.constants import FLOAT_BITWIDTH
19
+ from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig
20
+ from model_compression_toolkit.core.common import Graph
21
21
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
22
22
  from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
23
23
  from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
24
24
  from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
25
25
 
26
26
 
27
- def compute_kpi_data(in_model: Any,
28
- representative_data_gen: Callable,
29
- core_config: CoreConfig,
30
- tpc: TargetPlatformCapabilities,
31
- fw_info: FrameworkInfo,
32
- fw_impl: FrameworkImplementation) -> KPI:
27
+ def compute_resource_utilization_data(in_model: Any,
28
+ representative_data_gen: Callable,
29
+ core_config: CoreConfig,
30
+ tpc: TargetPlatformCapabilities,
31
+ fw_info: FrameworkInfo,
32
+ fw_impl: FrameworkImplementation) -> ResourceUtilization:
33
33
  """
34
- Compute KPI information that can be relevant for defining target KPI for mixed precision search.
34
+ Compute Resource Utilization information that can be relevant for defining target ResourceUtilization for mixed precision search.
35
35
  Calculates maximal activation tensor, sum of weights' parameters and total (sum of both).
36
36
 
37
37
  Args:
@@ -43,12 +43,12 @@ def compute_kpi_data(in_model: Any,
43
43
  fw_info: Information needed for quantization about the specific framework.
44
44
  fw_impl: FrameworkImplementation object with a specific framework methods implementation.
45
45
 
46
- Returns: A KPI object with the results.
46
+ Returns: A ResourceUtilization object with the results.
47
47
 
48
48
  """
49
49
 
50
- # We assume that the kpi_data API is used to compute the model KPI for mixed precision scenario,
51
- # so we run graph preparation under the assumption of enabled mixed precision.
50
+ # We assume that the resource_utilization_data API is used to compute the model resource utilization for
51
+ # mixed precision scenario, so we run graph preparation under the assumption of enabled mixed precision.
52
52
  transformed_graph = graph_preparation_runner(in_model,
53
53
  representative_data_gen,
54
54
  core_config.quantization_config,
@@ -65,17 +65,17 @@ def compute_kpi_data(in_model: Any,
65
65
  activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph)
66
66
  max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
67
67
 
68
- # Compute total kpi - parameters sum + max activation tensor
68
+ # Compute total memory utilization - parameters sum + max activation tensor
69
69
  total_size = total_weights_params + max_activation_tensor_size
70
70
 
71
- # Compute BOPS kpi - total count of bit-operations for all configurable layers with kernel
71
+ # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
72
72
  bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
73
73
  bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
74
74
 
75
- return KPI(weights_memory=total_weights_params,
76
- activation_memory=max_activation_tensor_size,
77
- total_memory=total_size,
78
- bops=bops_count)
75
+ return ResourceUtilization(weights_memory=total_weights_params,
76
+ activation_memory=max_activation_tensor_size,
77
+ total_memory=total_size,
78
+ bops=bops_count)
79
79
 
80
80
 
81
81
  def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> np.ndarray: