mct-nightly 1.11.0.20240321.357__py3-none-any.whl → 1.11.0.20240323.408__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240323.408.dist-info}/METADATA +17 -9
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240323.408.dist-info}/RECORD +152 -152
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +1 -1
- model_compression_toolkit/core/__init__.py +3 -3
- model_compression_toolkit/core/common/collectors/base_collector.py +2 -2
- model_compression_toolkit/core/common/data_loader.py +3 -3
- model_compression_toolkit/core/common/graph/base_graph.py +10 -13
- model_compression_toolkit/core/common/graph/base_node.py +3 -3
- model_compression_toolkit/core/common/graph/edge.py +2 -1
- model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py +2 -4
- model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +2 -2
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
- model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +1 -2
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +24 -23
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +110 -112
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +114 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py → resource_utilization_tools/resource_utilization_data.py} +19 -19
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +105 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +26 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} +61 -61
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +75 -71
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +2 -4
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +34 -34
- model_compression_toolkit/core/common/model_collector.py +2 -2
- model_compression_toolkit/core/common/network_editors/actions.py +3 -3
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +12 -12
- model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/memory_calculator.py +7 -7
- model_compression_toolkit/core/common/pruning/prune_graph.py +2 -3
- model_compression_toolkit/core/common/pruning/pruner.py +7 -7
- model_compression_toolkit/core/common/pruning/pruning_config.py +1 -1
- model_compression_toolkit/core/common/pruning/pruning_info.py +2 -2
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +7 -4
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +3 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +4 -2
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +4 -6
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +2 -4
- model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -1
- model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py +8 -6
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +4 -6
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +4 -7
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +3 -3
- model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +1 -1
- model_compression_toolkit/core/common/substitutions/weights_activation_split.py +3 -3
- model_compression_toolkit/core/common/user_info.py +1 -1
- model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/keras/back2framework/instance_builder.py +2 -2
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +4 -8
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py +3 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +2 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +3 -3
- model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py +1 -2
- model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py +5 -6
- model_compression_toolkit/core/keras/keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +2 -4
- model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py +7 -7
- model_compression_toolkit/core/keras/reader/common.py +2 -2
- model_compression_toolkit/core/keras/reader/node_builder.py +1 -1
- model_compression_toolkit/core/keras/{kpi_data_facade.py → resource_utilization_data_facade.py} +25 -24
- model_compression_toolkit/core/keras/tf_tensor_numpy.py +4 -2
- model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +6 -11
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +5 -5
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +3 -7
- model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py +1 -2
- model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py +2 -2
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +3 -3
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +1 -1
- model_compression_toolkit/core/pytorch/reader/graph_builders.py +5 -7
- model_compression_toolkit/core/pytorch/reader/reader.py +2 -2
- model_compression_toolkit/core/pytorch/{kpi_data_facade.py → resource_utilization_data_facade.py} +24 -22
- model_compression_toolkit/core/pytorch/utils.py +3 -2
- model_compression_toolkit/core/runner.py +43 -42
- model_compression_toolkit/data_generation/common/data_generation.py +18 -18
- model_compression_toolkit/data_generation/common/model_info_exctractors.py +1 -1
- model_compression_toolkit/data_generation/keras/keras_data_generation.py +7 -10
- model_compression_toolkit/data_generation/keras/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/image_initilization.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +2 -4
- model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +8 -11
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +1 -1
- model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +8 -4
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +7 -8
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +19 -12
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +10 -11
- model_compression_toolkit/gptq/common/gptq_graph.py +3 -3
- model_compression_toolkit/gptq/common/gptq_training.py +14 -12
- model_compression_toolkit/gptq/keras/gptq_training.py +10 -8
- model_compression_toolkit/gptq/keras/graph_info.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +15 -17
- model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +4 -5
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/gptq/pytorch/gptq_training.py +10 -8
- model_compression_toolkit/gptq/pytorch/graph_info.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +11 -13
- model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +3 -4
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/logger.py +1 -13
- model_compression_toolkit/pruning/keras/pruning_facade.py +11 -12
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +11 -12
- model_compression_toolkit/ptq/keras/quantization_facade.py +13 -14
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +7 -8
- model_compression_toolkit/qat/keras/quantization_facade.py +20 -22
- model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +12 -14
- model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/target_platform_capabilities/immutable.py +4 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +4 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +43 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +13 -18
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py +2 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +5 -5
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +13 -13
- model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py +14 -7
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +5 -5
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -3
- model_compression_toolkit/trainable_infrastructure/keras/load_model.py +4 -5
- model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +3 -4
- model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +0 -112
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +0 -26
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240323.408.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240323.408.dist-info}/WHEEL +0 -0
- {mct_nightly-1.11.0.20240321.357.dist-info → mct_nightly-1.11.0.20240323.408.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/core/common/mixed_precision/{kpi_tools → resource_utilization_tools}/__init__.py +0 -0
|
@@ -23,9 +23,9 @@ from model_compression_toolkit.core.common.framework_implementation import Frame
|
|
|
23
23
|
from model_compression_toolkit.core.common.graph.base_graph import Graph
|
|
24
24
|
from model_compression_toolkit.core.common.graph.virtual_activation_weights_node import VirtualActivationWeightsNode, \
|
|
25
25
|
VirtualSplitWeightsNode, VirtualSplitActivationNode
|
|
26
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
27
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
28
|
-
from model_compression_toolkit.core.common.mixed_precision.
|
|
26
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget, ResourceUtilization
|
|
27
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
|
28
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
|
29
29
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
|
30
30
|
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
|
31
31
|
|
|
@@ -40,8 +40,8 @@ class MixedPrecisionSearchManager:
|
|
|
40
40
|
fw_info: FrameworkInfo,
|
|
41
41
|
fw_impl: FrameworkImplementation,
|
|
42
42
|
sensitivity_evaluator: SensitivityEvaluation,
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
ru_functions: Dict[RUTarget, Tuple[MpRuMetric, MpRuAggregation]],
|
|
44
|
+
target_resource_utilization: ResourceUtilization,
|
|
45
45
|
original_graph: Graph = None):
|
|
46
46
|
"""
|
|
47
47
|
|
|
@@ -51,10 +51,10 @@ class MixedPrecisionSearchManager:
|
|
|
51
51
|
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
|
52
52
|
sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
|
|
53
53
|
a bit-width configuration for the MP model.
|
|
54
|
-
|
|
55
|
-
a couple of
|
|
56
|
-
|
|
57
|
-
original_graph: In case we have a search over a virtual graph (if we have BOPS
|
|
54
|
+
ru_functions: A dictionary with pairs of (MpRuMethod, MpRuAggregationMethod) mapping a RUTarget to
|
|
55
|
+
a couple of resource utilization metric function and resource utilization aggregation function.
|
|
56
|
+
target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
|
|
57
|
+
original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
|
|
58
58
|
will contain the original graph (for config reconstruction purposes).
|
|
59
59
|
"""
|
|
60
60
|
|
|
@@ -66,12 +66,12 @@ class MixedPrecisionSearchManager:
|
|
|
66
66
|
self.layer_to_bitwidth_mapping = self.get_search_space()
|
|
67
67
|
self.compute_metric_fn = self.get_sensitivity_metric()
|
|
68
68
|
|
|
69
|
-
self.
|
|
70
|
-
self.
|
|
71
|
-
self.
|
|
72
|
-
self.
|
|
73
|
-
self.
|
|
74
|
-
self.
|
|
69
|
+
self.compute_ru_functions = ru_functions
|
|
70
|
+
self.target_resource_utilization = target_resource_utilization
|
|
71
|
+
self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
|
|
72
|
+
self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
|
|
73
|
+
self.min_ru = self.compute_min_ru()
|
|
74
|
+
self.non_conf_ru_dict = self._non_configurable_nodes_ru()
|
|
75
75
|
|
|
76
76
|
self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
|
|
77
77
|
original_graph=self.original_graph)
|
|
@@ -106,112 +106,114 @@ class MixedPrecisionSearchManager:
|
|
|
106
106
|
|
|
107
107
|
return self.sensitivity_evaluator.compute_metric
|
|
108
108
|
|
|
109
|
-
def
|
|
109
|
+
def compute_min_ru(self) -> Dict[RUTarget, np.ndarray]:
|
|
110
110
|
"""
|
|
111
|
-
Computes a
|
|
111
|
+
Computes a resource utilization vector with the values matching to the minimal mp configuration
|
|
112
112
|
(i.e., each node is configured with the quantization candidate that would give the minimal size of the
|
|
113
|
-
node's
|
|
114
|
-
The method computes the minimal
|
|
113
|
+
node's resource utilization).
|
|
114
|
+
The method computes the minimal resource utilization vector for each target resource utilization.
|
|
115
115
|
|
|
116
|
-
Returns: A dictionary mapping each
|
|
116
|
+
Returns: A dictionary mapping each target resource utilization to its respective minimal
|
|
117
|
+
resource utilization values.
|
|
117
118
|
|
|
118
119
|
"""
|
|
119
|
-
|
|
120
|
-
for
|
|
121
|
-
#
|
|
122
|
-
# the first one)
|
|
123
|
-
|
|
120
|
+
min_ru = {}
|
|
121
|
+
for ru_target, ru_fns in self.compute_ru_functions.items():
|
|
122
|
+
# ru_fns is a pair of resource utilization computation method and
|
|
123
|
+
# resource utilization aggregation method (in this method we only need the first one)
|
|
124
|
+
min_ru[ru_target] = ru_fns[0](self.min_ru_config, self.graph, self.fw_info, self.fw_impl)
|
|
124
125
|
|
|
125
|
-
return
|
|
126
|
+
return min_ru
|
|
126
127
|
|
|
127
|
-
def
|
|
128
|
+
def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
|
|
128
129
|
"""
|
|
129
|
-
Computes and builds a
|
|
130
|
+
Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
|
|
130
131
|
The matrix is constructed as follows (for a given target):
|
|
131
|
-
- Each row represents the set of
|
|
132
|
-
length of the output of the respective target
|
|
133
|
-
- Each entry in a specific column represents the
|
|
134
|
-
with specific candidate, all other layer are at the minimal
|
|
135
|
-
respective row.
|
|
132
|
+
- Each row represents the set of resource utilization values for a specific resource utilization
|
|
133
|
+
measure (number of rows should be equal to the length of the output of the respective target compute_ru function).
|
|
134
|
+
- Each entry in a specific column represents the resource utilization value of a given configuration
|
|
135
|
+
(single layer is configured with specific candidate, all other layer are at the minimal resource
|
|
136
|
+
utilization configuration) for the resource utilization measure of the respective row.
|
|
136
137
|
|
|
137
138
|
Args:
|
|
138
|
-
target: The target for which the
|
|
139
|
+
target: The resource target for which the resource utilization is calculated (a RUTarget value).
|
|
139
140
|
|
|
140
|
-
Returns: A
|
|
141
|
+
Returns: A resource utilization matrix.
|
|
141
142
|
|
|
142
143
|
"""
|
|
143
|
-
assert isinstance(target,
|
|
144
|
+
assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
|
|
144
145
|
|
|
145
146
|
configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
|
|
146
147
|
|
|
147
|
-
|
|
148
|
+
ru_matrix = []
|
|
148
149
|
for c, c_n in enumerate(configurable_sorted_nodes):
|
|
149
150
|
for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
|
|
150
|
-
if candidate_idx == self.
|
|
151
|
-
# skip
|
|
151
|
+
if candidate_idx == self.min_ru_config[c]:
|
|
152
|
+
# skip ru computation for min configuration. Since we compute the difference from min_ru it'll
|
|
152
153
|
# always be 0 for all entries in the results vector.
|
|
153
|
-
|
|
154
|
+
candidate_rus = np.zeros(shape=self.min_ru[target].shape)
|
|
154
155
|
else:
|
|
155
|
-
|
|
156
|
-
|
|
156
|
+
candidate_rus = self.compute_candidate_relative_ru(c, candidate_idx, target)
|
|
157
|
+
ru_matrix.append(np.asarray(candidate_rus))
|
|
157
158
|
|
|
158
|
-
# We need to transpose the calculated
|
|
159
|
+
# We need to transpose the calculated ru matrix to allow later multiplication with
|
|
159
160
|
# the indicators' diagonal matrix.
|
|
160
161
|
# We only move the first axis (num of configurations) to be last,
|
|
161
162
|
# the remaining axes include the metric specific nodes (rows dimension of the new tensor)
|
|
162
|
-
# and the
|
|
163
|
-
|
|
164
|
-
return np.moveaxis(
|
|
165
|
-
|
|
166
|
-
def
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
"""
|
|
171
|
-
Computes a
|
|
172
|
-
which is obtained by computing the given target's
|
|
163
|
+
# and the ru metric values (if they are non-scalars)
|
|
164
|
+
np_ru_matrix = np.array(ru_matrix)
|
|
165
|
+
return np.moveaxis(np_ru_matrix, source=0, destination=len(np_ru_matrix.shape) - 1)
|
|
166
|
+
|
|
167
|
+
def compute_candidate_relative_ru(self,
|
|
168
|
+
conf_node_idx: int,
|
|
169
|
+
candidate_idx: int,
|
|
170
|
+
target: RUTarget) -> np.ndarray:
|
|
171
|
+
"""
|
|
172
|
+
Computes a resource utilization vector for a given candidates of a given configurable node,
|
|
173
|
+
i.e., the matching resource utilization vector which is obtained by computing the given target's
|
|
174
|
+
resource utilization function on a minimal configuration in which the given
|
|
173
175
|
layer's candidates is changed to the new given one.
|
|
174
|
-
The result is normalized by subtracting the target's minimal
|
|
176
|
+
The result is normalized by subtracting the target's minimal resource utilization vector.
|
|
175
177
|
|
|
176
178
|
Args:
|
|
177
179
|
conf_node_idx: The index of a node in a sorted configurable nodes list.
|
|
178
180
|
candidate_idx: The index of a node's quantization configuration candidate.
|
|
179
|
-
target: The target for which the
|
|
181
|
+
target: The target for which the resource utilization is calculated (a RUTarget value).
|
|
180
182
|
|
|
181
|
-
Returns: Normalized node's
|
|
183
|
+
Returns: Normalized node's resource utilization vector
|
|
182
184
|
|
|
183
185
|
"""
|
|
184
|
-
return self.
|
|
185
|
-
self.
|
|
186
|
+
return self.compute_node_ru_for_candidate(conf_node_idx, candidate_idx, target) - \
|
|
187
|
+
self.get_min_target_resource_utilization(target)
|
|
186
188
|
|
|
187
|
-
def
|
|
189
|
+
def get_min_target_resource_utilization(self, target: RUTarget) -> np.ndarray:
|
|
188
190
|
"""
|
|
189
|
-
Returns the minimal
|
|
191
|
+
Returns the minimal resource utilization vector (pre-calculated on initialization) of a specific target.
|
|
190
192
|
|
|
191
193
|
Args:
|
|
192
|
-
target: The target for which the
|
|
194
|
+
target: The target for which the resource utilization is calculated (a RUTarget value).
|
|
193
195
|
|
|
194
|
-
Returns: Minimal
|
|
196
|
+
Returns: Minimal resource utilization vector.
|
|
195
197
|
|
|
196
198
|
"""
|
|
197
|
-
return self.
|
|
199
|
+
return self.min_ru[target]
|
|
198
200
|
|
|
199
|
-
def
|
|
201
|
+
def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
|
|
200
202
|
"""
|
|
201
|
-
Computes a
|
|
203
|
+
Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
|
|
202
204
|
target configuration with the given candidate index.
|
|
203
205
|
|
|
204
206
|
Args:
|
|
205
207
|
conf_node_idx: The index of a node in a sorted configurable nodes list.
|
|
206
|
-
candidate_idx: Quantization config candidate to be used for the node's
|
|
207
|
-
target: The target for which the
|
|
208
|
+
candidate_idx: Quantization config candidate to be used for the node's resource utilization computation.
|
|
209
|
+
target: The target for which the resource utilization is calculated (a RUTarget value).
|
|
208
210
|
|
|
209
|
-
Returns: Node's
|
|
211
|
+
Returns: Node's resource utilization vector.
|
|
210
212
|
|
|
211
213
|
"""
|
|
212
|
-
return self.
|
|
214
|
+
return self.compute_ru_functions[target][0](
|
|
213
215
|
self.replace_config_in_index(
|
|
214
|
-
self.
|
|
216
|
+
self.min_ru_config,
|
|
215
217
|
conf_node_idx,
|
|
216
218
|
candidate_idx),
|
|
217
219
|
self.graph,
|
|
@@ -236,58 +238,60 @@ class MixedPrecisionSearchManager:
|
|
|
236
238
|
updated_cfg[idx] = value
|
|
237
239
|
return updated_cfg
|
|
238
240
|
|
|
239
|
-
def
|
|
241
|
+
def _non_configurable_nodes_ru(self) -> Dict[RUTarget, np.ndarray]:
|
|
240
242
|
"""
|
|
241
|
-
Computes a
|
|
243
|
+
Computes a resource utilization vector of all non-configurable nodes in the given graph for each of the
|
|
244
|
+
resource utilization targets.
|
|
242
245
|
|
|
243
|
-
Returns: A mapping between a
|
|
246
|
+
Returns: A mapping between a RUTarget and its non-configurable nodes' resource utilization vector.
|
|
244
247
|
"""
|
|
245
248
|
|
|
246
|
-
|
|
247
|
-
for target,
|
|
248
|
-
# Call for the
|
|
249
|
+
non_conf_ru_dict = {}
|
|
250
|
+
for target, ru_value in self.target_resource_utilization.get_resource_utilization_dict().items():
|
|
251
|
+
# Call for the ru method of the given target - empty quantization configuration list is passed since we
|
|
249
252
|
# compute for non-configurable nodes
|
|
250
|
-
if target ==
|
|
251
|
-
|
|
253
|
+
if target == RUTarget.BOPS:
|
|
254
|
+
ru_vector = None
|
|
252
255
|
else:
|
|
253
|
-
|
|
256
|
+
ru_vector = self.compute_ru_functions[target][0]([], self.graph, self.fw_info, self.fw_impl)
|
|
254
257
|
|
|
255
|
-
|
|
258
|
+
non_conf_ru_dict[target] = ru_vector
|
|
256
259
|
|
|
257
|
-
return
|
|
260
|
+
return non_conf_ru_dict
|
|
258
261
|
|
|
259
|
-
def
|
|
262
|
+
def compute_resource_utilization_for_config(self, config: List[int]) -> ResourceUtilization:
|
|
260
263
|
"""
|
|
261
|
-
Computes the
|
|
264
|
+
Computes the resource utilization values for a given mixed-precision configuration.
|
|
262
265
|
|
|
263
266
|
Args:
|
|
264
267
|
config: A mixed-precision configuration (list of candidates indices)
|
|
265
268
|
|
|
266
|
-
Returns: A
|
|
269
|
+
Returns: A ResourceUtilization object with the model's resource utilization values when quantized
|
|
270
|
+
with the given config.
|
|
267
271
|
|
|
268
272
|
"""
|
|
269
273
|
|
|
270
|
-
|
|
274
|
+
ru_dict = {}
|
|
271
275
|
|
|
272
|
-
for
|
|
273
|
-
# Passing False to
|
|
276
|
+
for ru_target, ru_fns in self.compute_ru_functions.items():
|
|
277
|
+
# Passing False to ru methods and aggregations to indicates that the computations
|
|
274
278
|
# are not for constraints setting
|
|
275
|
-
if
|
|
276
|
-
|
|
279
|
+
if ru_target == RUTarget.BOPS:
|
|
280
|
+
configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl, False)
|
|
277
281
|
else:
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
if
|
|
281
|
-
|
|
282
|
+
configurable_nodes_ru_vector = ru_fns[0](config, self.original_graph, self.fw_info, self.fw_impl)
|
|
283
|
+
non_configurable_nodes_ru_vector = self.non_conf_ru_dict.get(ru_target)
|
|
284
|
+
if non_configurable_nodes_ru_vector is None or len(non_configurable_nodes_ru_vector) == 0:
|
|
285
|
+
ru_ru = self.compute_ru_functions[ru_target][1](configurable_nodes_ru_vector, False)
|
|
282
286
|
else:
|
|
283
|
-
|
|
284
|
-
np.concatenate([
|
|
287
|
+
ru_ru = self.compute_ru_functions[ru_target][1](
|
|
288
|
+
np.concatenate([configurable_nodes_ru_vector, non_configurable_nodes_ru_vector]), False)
|
|
285
289
|
|
|
286
|
-
|
|
290
|
+
ru_dict[ru_target] = ru_ru[0]
|
|
287
291
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
return
|
|
292
|
+
config_ru = ResourceUtilization()
|
|
293
|
+
config_ru.set_resource_utilization_by_target(ru_dict)
|
|
294
|
+
return config_ru
|
|
291
295
|
|
|
292
296
|
def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
|
|
293
297
|
"""
|
|
@@ -317,7 +321,7 @@ class MixedPrecisionSearchManager:
|
|
|
317
321
|
class ConfigReconstructionHelper:
|
|
318
322
|
"""
|
|
319
323
|
A class to help reconstruct an original mixed-precision configuration from a virtual one,
|
|
320
|
-
when running mixed-precision search with BOPS
|
|
324
|
+
when running mixed-precision search with BOPS utilization.
|
|
321
325
|
It provides a reconstruct_config_from_virtual_graph which allows to translate a bit-width config of a virtual graph
|
|
322
326
|
to a config of the original configurable nodes.
|
|
323
327
|
"""
|
|
@@ -375,8 +379,7 @@ class ConfigReconstructionHelper:
|
|
|
375
379
|
|
|
376
380
|
if changed_virtual_nodes_idx is not None:
|
|
377
381
|
if original_base_config is None:
|
|
378
|
-
Logger.critical("
|
|
379
|
-
"set of nodes.") # pragma: no cover
|
|
382
|
+
Logger.critical("To run config reconstruction for a partial set of nodes, a base original config must be provided.") # pragma: no cover
|
|
380
383
|
|
|
381
384
|
updated_virtual_nodes = \
|
|
382
385
|
[(idx, self.virtual_graph.get_configurable_sorted_nodes(self.fw_info)[idx]) for idx in changed_virtual_nodes_idx]
|
|
@@ -418,9 +421,7 @@ class ConfigReconstructionHelper:
|
|
|
418
421
|
if isinstance(weights_node, VirtualSplitWeightsNode):
|
|
419
422
|
self.get_activation_for_split_weights(weights_node, n, virtual_cfg_idx, virtual_mp_cfg)
|
|
420
423
|
else:
|
|
421
|
-
Logger.
|
|
422
|
-
f"in order to construct the virtual graph, but node {n.name} is not of type "
|
|
423
|
-
f"VirtualSplitWeightsNode") # pragma: no cover
|
|
424
|
+
Logger.critical(f"Virtual graph construction error: Expected all weights nodes to be split into weights and activation nodes. Found node '{n.name}' not split as expected. Every weights node should correspond to a VirtualSplitWeightsNode type.") # pragma: no cover
|
|
424
425
|
|
|
425
426
|
activation_node = n.original_activation_node
|
|
426
427
|
if isinstance(activation_node, VirtualSplitActivationNode):
|
|
@@ -441,15 +442,13 @@ class ConfigReconstructionHelper:
|
|
|
441
442
|
# It's ok, need to find the node's configuration
|
|
442
443
|
self.get_activation_for_split_weights(n, n, virtual_cfg_idx, virtual_mp_cfg)
|
|
443
444
|
else:
|
|
444
|
-
Logger.
|
|
445
|
-
f"but its predecessor doesn't have multiple outputs.") # pragma: no cover
|
|
445
|
+
Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
|
|
446
446
|
elif isinstance(n, VirtualSplitActivationNode):
|
|
447
447
|
self.get_weights_for_split_activation(n, n, virtual_cfg_idx, virtual_mp_cfg)
|
|
448
448
|
else:
|
|
449
449
|
# Node didn't change in virtual graph - candidates list is similar to original
|
|
450
450
|
if n.name not in self.origin_sorted_conf_nodes_names:
|
|
451
|
-
Logger.
|
|
452
|
-
f"but is not configurable in the original graph.") # pragma: no cover
|
|
451
|
+
Logger.critical(f"Configuration mismatch: Node '{n.name}' is configurable in the virtual graph but not in the original graph. Verify node configurations.") # pragma: no cover
|
|
453
452
|
origin_idx = self.origin_sorted_conf_nodes_names.index(n.name)
|
|
454
453
|
self.origin_node_idx_to_cfg[origin_idx] = virtual_cfg_idx
|
|
455
454
|
|
|
@@ -654,8 +653,7 @@ class ConfigReconstructionHelper:
|
|
|
654
653
|
# It's ok, need to find the node's configuration
|
|
655
654
|
self.retrieve_weights_activation_config(activation_node, weights_node, virtual_node, virtual_cfg_idx, virtual_mp_cfg)
|
|
656
655
|
else:
|
|
657
|
-
Logger.
|
|
658
|
-
f"but its predecessor doesn't have multiple outputs.") # pragma: no cover
|
|
656
|
+
Logger.critical(f"Virtual graph configuration error: Expected the predecessor of node '{n.name}' to have multiple outputs when not composed with an activation node.") # pragma: no cover
|
|
659
657
|
|
|
660
658
|
def update_config_at_original_idx(self, n: BaseNode, origin_cfg_idx: int):
|
|
661
659
|
"""
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from typing import Dict, Any
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RUTarget(Enum):
|
|
22
|
+
"""
|
|
23
|
+
Targets for which we define Resource Utilization metrics for mixed-precision search.
|
|
24
|
+
For each target that we care to consider in a mixed-precision search, there should be defined a set of
|
|
25
|
+
resource utilization computation function, resource utilization aggregation function,
|
|
26
|
+
and resource utilization target (within a ResourceUtilization object).
|
|
27
|
+
|
|
28
|
+
Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum.
|
|
29
|
+
|
|
30
|
+
WEIGHTS - Weights memory ResourceUtilization metric.
|
|
31
|
+
|
|
32
|
+
ACTIVATION - Activation memory ResourceUtilization metric.
|
|
33
|
+
|
|
34
|
+
TOTAL - Total memory ResourceUtilization metric.
|
|
35
|
+
|
|
36
|
+
BOPS - Total Bit-Operations ResourceUtilization Metric.
|
|
37
|
+
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
WEIGHTS = 'weights'
|
|
41
|
+
ACTIVATION = 'activation'
|
|
42
|
+
TOTAL = 'total'
|
|
43
|
+
BOPS = 'bops'
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class ResourceUtilization:
|
|
47
|
+
"""
|
|
48
|
+
Class to represent measurements of performance.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self,
|
|
52
|
+
weights_memory: float = np.inf,
|
|
53
|
+
activation_memory: float = np.inf,
|
|
54
|
+
total_memory: float = np.inf,
|
|
55
|
+
bops: float = np.inf):
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).
|
|
60
|
+
activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric.
|
|
61
|
+
total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric.
|
|
62
|
+
bops: The total bit-operations in the model.
|
|
63
|
+
"""
|
|
64
|
+
self.weights_memory = weights_memory
|
|
65
|
+
self.activation_memory = activation_memory
|
|
66
|
+
self.total_memory = total_memory
|
|
67
|
+
self.bops = bops
|
|
68
|
+
|
|
69
|
+
def __repr__(self):
|
|
70
|
+
return f"Weights_memory: {self.weights_memory}, " \
|
|
71
|
+
f"Activation_memory: {self.activation_memory}, " \
|
|
72
|
+
f"Total_memory: {self.total_memory}, " \
|
|
73
|
+
f"BOPS: {self.bops}"
|
|
74
|
+
|
|
75
|
+
def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
|
|
76
|
+
"""
|
|
77
|
+
Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
|
|
78
|
+
"""
|
|
79
|
+
return {RUTarget.WEIGHTS: self.weights_memory,
|
|
80
|
+
RUTarget.ACTIVATION: self.activation_memory,
|
|
81
|
+
RUTarget.TOTAL: self.total_memory,
|
|
82
|
+
RUTarget.BOPS: self.bops}
|
|
83
|
+
|
|
84
|
+
def set_resource_utilization_by_target(self, ru_mapping: Dict[RUTarget, float]):
|
|
85
|
+
"""
|
|
86
|
+
Setting a ResourceUtilization object values for each ResourceUtilization target in the given dictionary.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
ru_mapping: A mapping from a RUTarget to a matching resource utilization value.
|
|
90
|
+
|
|
91
|
+
"""
|
|
92
|
+
self.weights_memory = ru_mapping.get(RUTarget.WEIGHTS, np.inf)
|
|
93
|
+
self.activation_memory = ru_mapping.get(RUTarget.ACTIVATION, np.inf)
|
|
94
|
+
self.total_memory = ru_mapping.get(RUTarget.TOTAL, np.inf)
|
|
95
|
+
self.bops = ru_mapping.get(RUTarget.BOPS, np.inf)
|
|
96
|
+
|
|
97
|
+
def holds_constraints(self, ru: Any) -> bool:
|
|
98
|
+
"""
|
|
99
|
+
Checks whether the given ResourceUtilization object holds a set of ResourceUtilization constraints defined by
|
|
100
|
+
the current ResourceUtilization object.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
ru: A ResourceUtilization object to check if it holds the constraints.
|
|
104
|
+
|
|
105
|
+
Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values.
|
|
106
|
+
|
|
107
|
+
"""
|
|
108
|
+
if not isinstance(ru, ResourceUtilization):
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
return ru.weights_memory <= self.weights_memory and \
|
|
112
|
+
ru.activation_memory <= self.activation_memory and \
|
|
113
|
+
ru.total_memory <= self.total_memory and \
|
|
114
|
+
ru.bops <= self.bops
|
|
@@ -12,26 +12,26 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
from typing import Callable, Any
|
|
16
15
|
import numpy as np
|
|
16
|
+
from typing import Callable, Any
|
|
17
17
|
|
|
18
|
-
from model_compression_toolkit.core import FrameworkInfo, KPI, CoreConfig
|
|
19
|
-
from model_compression_toolkit.core.common import Graph
|
|
20
18
|
from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
|
19
|
+
from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig
|
|
20
|
+
from model_compression_toolkit.core.common import Graph
|
|
21
21
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
|
22
22
|
from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
|
|
23
23
|
from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
|
|
24
24
|
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
def compute_resource_utilization_data(in_model: Any,
|
|
28
|
+
representative_data_gen: Callable,
|
|
29
|
+
core_config: CoreConfig,
|
|
30
|
+
tpc: TargetPlatformCapabilities,
|
|
31
|
+
fw_info: FrameworkInfo,
|
|
32
|
+
fw_impl: FrameworkImplementation) -> ResourceUtilization:
|
|
33
33
|
"""
|
|
34
|
-
Compute
|
|
34
|
+
Compute Resource Utilization information that can be relevant for defining target ResourceUtilization for mixed precision search.
|
|
35
35
|
Calculates maximal activation tensor, sum of weights' parameters and total (sum of both).
|
|
36
36
|
|
|
37
37
|
Args:
|
|
@@ -43,12 +43,12 @@ def compute_kpi_data(in_model: Any,
|
|
|
43
43
|
fw_info: Information needed for quantization about the specific framework.
|
|
44
44
|
fw_impl: FrameworkImplementation object with a specific framework methods implementation.
|
|
45
45
|
|
|
46
|
-
Returns: A
|
|
46
|
+
Returns: A ResourceUtilization object with the results.
|
|
47
47
|
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
|
-
# We assume that the
|
|
51
|
-
# so we run graph preparation under the assumption of enabled mixed precision.
|
|
50
|
+
# We assume that the resource_utilization_data API is used to compute the model resource utilization for
|
|
51
|
+
# mixed precision scenario, so we run graph preparation under the assumption of enabled mixed precision.
|
|
52
52
|
transformed_graph = graph_preparation_runner(in_model,
|
|
53
53
|
representative_data_gen,
|
|
54
54
|
core_config.quantization_config,
|
|
@@ -65,17 +65,17 @@ def compute_kpi_data(in_model: Any,
|
|
|
65
65
|
activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph)
|
|
66
66
|
max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
|
|
67
67
|
|
|
68
|
-
# Compute total
|
|
68
|
+
# Compute total memory utilization - parameters sum + max activation tensor
|
|
69
69
|
total_size = total_weights_params + max_activation_tensor_size
|
|
70
70
|
|
|
71
|
-
# Compute BOPS
|
|
71
|
+
# Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
|
|
72
72
|
bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
|
|
73
73
|
bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
|
|
74
74
|
|
|
75
|
-
return
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
return ResourceUtilization(weights_memory=total_weights_params,
|
|
76
|
+
activation_memory=max_activation_tensor_size,
|
|
77
|
+
total_memory=total_size,
|
|
78
|
+
bops=bops_count)
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> np.ndarray:
|