mct-nightly 1.11.0.20240320.400__py3-none-any.whl → 1.11.0.20240322.404__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/METADATA +17 -9
- {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/RECORD +152 -152
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +1 -1
- model_compression_toolkit/core/__init__.py +3 -3
- model_compression_toolkit/core/common/collectors/base_collector.py +2 -2
- model_compression_toolkit/core/common/data_loader.py +3 -3
- model_compression_toolkit/core/common/graph/base_graph.py +10 -13
- model_compression_toolkit/core/common/graph/base_node.py +3 -3
- model_compression_toolkit/core/common/graph/edge.py +2 -1
- model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py +2 -4
- model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py +2 -2
- model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
- model_compression_toolkit/core/common/hessian/trace_hessian_calculator.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py +1 -2
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +24 -23
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +110 -112
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +114 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_data.py → resource_utilization_tools/resource_utilization_data.py} +19 -19
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +105 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +26 -0
- model_compression_toolkit/core/common/mixed_precision/{kpi_tools/kpi_methods.py → resource_utilization_tools/ru_methods.py} +61 -61
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +75 -71
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +2 -4
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +34 -34
- model_compression_toolkit/core/common/model_collector.py +2 -2
- model_compression_toolkit/core/common/network_editors/actions.py +3 -3
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +12 -12
- model_compression_toolkit/core/common/pruning/importance_metrics/lfh_importance_metric.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +2 -2
- model_compression_toolkit/core/common/pruning/memory_calculator.py +7 -7
- model_compression_toolkit/core/common/pruning/prune_graph.py +2 -3
- model_compression_toolkit/core/common/pruning/pruner.py +7 -7
- model_compression_toolkit/core/common/pruning/pruning_config.py +1 -1
- model_compression_toolkit/core/common/pruning/pruning_info.py +2 -2
- model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +7 -4
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +3 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +4 -2
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +4 -6
- model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +2 -4
- model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -1
- model_compression_toolkit/core/common/quantization/quantizers/uniform_quantizers.py +8 -6
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +4 -6
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +4 -7
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +3 -3
- model_compression_toolkit/core/common/substitutions/virtual_activation_weights_composition.py +1 -1
- model_compression_toolkit/core/common/substitutions/weights_activation_split.py +3 -3
- model_compression_toolkit/core/common/user_info.py +1 -1
- model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/keras/back2framework/instance_builder.py +2 -2
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +4 -8
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/input_scaling.py +3 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +2 -2
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/matmul_substitution.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/multi_head_attention_decomposition.py +1 -1
- model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/keras/hessian/activation_trace_hessian_calculator_keras.py +3 -3
- model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py +1 -2
- model_compression_toolkit/core/keras/hessian/weights_trace_hessian_calculator_keras.py +5 -6
- model_compression_toolkit/core/keras/keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +2 -4
- model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +1 -1
- model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py +7 -7
- model_compression_toolkit/core/keras/reader/common.py +2 -2
- model_compression_toolkit/core/keras/reader/node_builder.py +1 -1
- model_compression_toolkit/core/keras/{kpi_data_facade.py → resource_utilization_data_facade.py} +25 -24
- model_compression_toolkit/core/keras/tf_tensor_numpy.py +4 -2
- model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
- model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +6 -11
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/functional_batch_norm.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/multi_head_attention_decomposition.py +5 -5
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/reshape_with_static_shapes.py +1 -1
- model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
- model_compression_toolkit/core/pytorch/hessian/activation_trace_hessian_calculator_pytorch.py +3 -7
- model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py +1 -2
- model_compression_toolkit/core/pytorch/hessian/weights_trace_hessian_calculator_pytorch.py +2 -2
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +3 -3
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +1 -1
- model_compression_toolkit/core/pytorch/reader/graph_builders.py +5 -7
- model_compression_toolkit/core/pytorch/reader/reader.py +2 -2
- model_compression_toolkit/core/pytorch/{kpi_data_facade.py → resource_utilization_data_facade.py} +24 -22
- model_compression_toolkit/core/pytorch/utils.py +3 -2
- model_compression_toolkit/core/runner.py +43 -42
- model_compression_toolkit/data_generation/common/data_generation.py +18 -18
- model_compression_toolkit/data_generation/common/model_info_exctractors.py +1 -1
- model_compression_toolkit/data_generation/keras/keras_data_generation.py +7 -10
- model_compression_toolkit/data_generation/keras/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/image_initilization.py +2 -1
- model_compression_toolkit/data_generation/keras/optimization_functions/output_loss_functions.py +2 -4
- model_compression_toolkit/data_generation/pytorch/model_info_exctractors.py +2 -1
- model_compression_toolkit/data_generation/pytorch/pytorch_data_generation.py +8 -11
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +1 -1
- model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +2 -3
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +8 -4
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +7 -8
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +19 -12
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +2 -2
- model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py +10 -11
- model_compression_toolkit/gptq/common/gptq_graph.py +3 -3
- model_compression_toolkit/gptq/common/gptq_training.py +14 -12
- model_compression_toolkit/gptq/keras/gptq_training.py +10 -8
- model_compression_toolkit/gptq/keras/graph_info.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +15 -17
- model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py +4 -5
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/gptq/pytorch/gptq_training.py +10 -8
- model_compression_toolkit/gptq/pytorch/graph_info.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +11 -13
- model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py +3 -4
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/logger.py +1 -13
- model_compression_toolkit/pruning/keras/pruning_facade.py +11 -12
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +11 -12
- model_compression_toolkit/ptq/keras/quantization_facade.py +13 -14
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +7 -8
- model_compression_toolkit/qat/keras/quantization_facade.py +20 -22
- model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/keras/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +12 -14
- model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +2 -3
- model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +1 -1
- model_compression_toolkit/target_platform_capabilities/immutable.py +4 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +4 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/current_tp_model.py +1 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py +43 -8
- model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +13 -18
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py +2 -2
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py +2 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py +5 -5
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +13 -13
- model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py +14 -7
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +5 -5
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -3
- model_compression_toolkit/trainable_infrastructure/keras/load_model.py +4 -5
- model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +3 -4
- model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi.py +0 -112
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/kpi_tools/kpi_functions_mapping.py +0 -26
- {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/LICENSE.md +0 -0
- {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/WHEEL +0 -0
- {mct_nightly-1.11.0.20240320.400.dist-info → mct_nightly-1.11.0.20240322.404.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/core/common/mixed_precision/{kpi_tools → resource_utilization_tools}/__init__.py +0 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
import copy
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from functools import partial
|
|
18
|
+
from typing import List, Any
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
from pulp import lpSum
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def sum_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[Any]:
|
|
25
|
+
"""
|
|
26
|
+
Aggregates resource utilization vector to a single resource utilization measure by summing all values.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
ru_vector: A vector with nodes' resource utilization values.
|
|
30
|
+
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
|
31
|
+
given config not for LP formalization purposes.
|
|
32
|
+
|
|
33
|
+
Returns: A list with an lpSum object for lp problem definition with the vector's sum.
|
|
34
|
+
|
|
35
|
+
"""
|
|
36
|
+
if not set_constraints:
|
|
37
|
+
return [0] if len(ru_vector) == 0 else [sum(ru_vector)]
|
|
38
|
+
return [lpSum(ru_vector)]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def max_ru_values(ru_vector: np.ndarray, set_constraints: bool = True) -> List[float]:
|
|
42
|
+
"""
|
|
43
|
+
Aggregates resource utilization vector to allow max constraint in the linear programming problem formalization.
|
|
44
|
+
In order to do so, we need to define a separate constraint on each value in the resource utilization vector,
|
|
45
|
+
to be bounded by the target resource utilization.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
ru_vector: A vector with nodes' resource utilization values.
|
|
49
|
+
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
|
50
|
+
given config not for LP formalization purposes.
|
|
51
|
+
|
|
52
|
+
Returns: A list with the vector's values, to be used to define max constraint
|
|
53
|
+
in the linear programming problem formalization.
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
if not set_constraints:
|
|
57
|
+
return [0] if len(ru_vector) == 0 else [max(ru_vector)]
|
|
58
|
+
return [ru for ru in ru_vector]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def total_ru(ru_tensor: np.ndarray, set_constraints: bool = True) -> List[float]:
|
|
62
|
+
"""
|
|
63
|
+
Aggregates resource utilization vector to allow weights and activation total utilization constraint in the linear programming
|
|
64
|
+
problem formalization. In order to do so, we need to define a separate constraint on each activation memory utilization value in
|
|
65
|
+
the resource utilization vector, combined with the sum weights memory utilization.
|
|
66
|
+
Note that the given ru_tensor should contain weights and activation utilization values in each entry.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
ru_tensor: A tensor with nodes' resource utilization values for weights and activation.
|
|
70
|
+
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
|
71
|
+
given config not for LP formalization purposes.
|
|
72
|
+
|
|
73
|
+
Returns: A list with lpSum objects, to be used to define total constraint
|
|
74
|
+
in the linear programming problem formalization.
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
if not set_constraints:
|
|
78
|
+
weights_ru = sum([ru[0] for ru in ru_tensor])
|
|
79
|
+
activation_ru = max([ru[1] for ru in ru_tensor])
|
|
80
|
+
return [weights_ru + activation_ru]
|
|
81
|
+
|
|
82
|
+
weights_ru = lpSum([ru[0] for ru in ru_tensor])
|
|
83
|
+
total_ru = [weights_ru + activation_ru for _, activation_ru in ru_tensor]
|
|
84
|
+
|
|
85
|
+
return total_ru
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class MpRuAggregation(Enum):
|
|
89
|
+
"""
|
|
90
|
+
Defines resource utilization aggregation functions that can be used to compute final resource utilization metric.
|
|
91
|
+
The enum values can be used to call a function on a set of arguments.
|
|
92
|
+
|
|
93
|
+
SUM - applies the sum_ru_values function
|
|
94
|
+
|
|
95
|
+
MAX - applies the max_ru_values function
|
|
96
|
+
|
|
97
|
+
TOTAL - applies the total_ru function
|
|
98
|
+
|
|
99
|
+
"""
|
|
100
|
+
SUM = partial(sum_ru_values)
|
|
101
|
+
MAX = partial(max_ru_values)
|
|
102
|
+
TOTAL = partial(total_ru)
|
|
103
|
+
|
|
104
|
+
def __call__(self, *args):
|
|
105
|
+
return self.value(*args)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
|
|
16
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_aggregation_methods import MpRuAggregation
|
|
17
|
+
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.ru_methods import MpRuMetric
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# When adding a RUTarget that we want to consider in our mp search,
|
|
21
|
+
# a matching pair of resource_utilization_tools computation function and a resource_utilization_tools
|
|
22
|
+
# aggregation function should be added to this dictionary
|
|
23
|
+
ru_functions_mapping = {RUTarget.WEIGHTS: (MpRuMetric.WEIGHTS_SIZE, MpRuAggregation.SUM),
|
|
24
|
+
RUTarget.ACTIVATION: (MpRuMetric.ACTIVATION_OUTPUT_SIZE, MpRuAggregation.MAX),
|
|
25
|
+
RUTarget.TOTAL: (MpRuMetric.TOTAL_WEIGHTS_ACTIVATION_SIZE, MpRuAggregation.TOTAL),
|
|
26
|
+
RUTarget.BOPS: (MpRuMetric.BOPS_COUNT, MpRuAggregation.SUM)}
|
|
@@ -28,14 +28,14 @@ from model_compression_toolkit.core.common.graph.virtual_activation_weights_node
|
|
|
28
28
|
from model_compression_toolkit.logger import Logger
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
31
|
+
def weights_size_utilization(mp_cfg: List[int],
|
|
32
|
+
graph: Graph,
|
|
33
|
+
fw_info: FrameworkInfo,
|
|
34
|
+
fw_impl: FrameworkImplementation) -> np.ndarray:
|
|
35
35
|
"""
|
|
36
|
-
Computes a
|
|
36
|
+
Computes a resource utilization vector with the respective weights' memory size for the given weight configurable node,
|
|
37
37
|
according to the given mixed-precision configuration.
|
|
38
|
-
If an empty configuration is given, then computes
|
|
38
|
+
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
|
39
39
|
|
|
40
40
|
Args:
|
|
41
41
|
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
|
@@ -52,7 +52,7 @@ def weights_size_kpi(mp_cfg: List[int],
|
|
|
52
52
|
weights_mp_nodes = [n.name for n in graph.get_sorted_weights_configurable_nodes(fw_info)]
|
|
53
53
|
|
|
54
54
|
if len(mp_cfg) == 0:
|
|
55
|
-
# Computing non-configurable nodes
|
|
55
|
+
# Computing non-configurable nodes resource utilization
|
|
56
56
|
# TODO: when enabling multiple attribute quantization by default (currently,
|
|
57
57
|
# only kernel quantization is enabled) we should include other attributes memory in the sum of all
|
|
58
58
|
# weights memory (when quantized to their default 8-bit, non-configurable).
|
|
@@ -71,7 +71,8 @@ def weights_size_kpi(mp_cfg: List[int],
|
|
|
71
71
|
node_weights_memory_in_bytes = _compute_node_weights_memory(n, node_nbits, fw_info)
|
|
72
72
|
weights_memory.append(node_weights_memory_in_bytes)
|
|
73
73
|
else:
|
|
74
|
-
# Go over configurable all nodes that should be taken into consideration when computing the weights
|
|
74
|
+
# Go over configurable all nodes that should be taken into consideration when computing the weights
|
|
75
|
+
# resource utilization.
|
|
75
76
|
for n in graph.get_sorted_weights_configurable_nodes(fw_info):
|
|
76
77
|
# Only nodes with kernel op can be considered configurable
|
|
77
78
|
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
|
@@ -86,14 +87,14 @@ def weights_size_kpi(mp_cfg: List[int],
|
|
|
86
87
|
return np.array(weights_memory)
|
|
87
88
|
|
|
88
89
|
|
|
89
|
-
def
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
90
|
+
def activation_output_size_utilization(mp_cfg: List[int],
|
|
91
|
+
graph: Graph,
|
|
92
|
+
fw_info: FrameworkInfo,
|
|
93
|
+
fw_impl: FrameworkImplementation) -> np.ndarray:
|
|
93
94
|
"""
|
|
94
|
-
Computes a
|
|
95
|
+
Computes a resource utilization vector with the respective output memory size for each activation configurable node,
|
|
95
96
|
according to the given mixed-precision configuration.
|
|
96
|
-
If an empty configuration is given, then computes
|
|
97
|
+
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
|
97
98
|
|
|
98
99
|
Args:
|
|
99
100
|
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
|
@@ -111,7 +112,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
|
|
|
111
112
|
activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
|
|
112
113
|
|
|
113
114
|
if len(mp_cfg) == 0:
|
|
114
|
-
# Computing non-configurable nodes
|
|
115
|
+
# Computing non-configurable nodes resource utilization
|
|
115
116
|
for n in graph.nodes:
|
|
116
117
|
non_configurable_node = n.name not in activation_mp_nodes \
|
|
117
118
|
and n.has_activation_quantization_enabled_candidate() \
|
|
@@ -122,7 +123,7 @@ def activation_output_size_kpi(mp_cfg: List[int],
|
|
|
122
123
|
node_activation_memory_in_bytes = _compute_node_activation_memory(n, node_nbits)
|
|
123
124
|
activation_memory.append(node_activation_memory_in_bytes)
|
|
124
125
|
else:
|
|
125
|
-
# Go over all nodes that should be taken into consideration when computing the weights
|
|
126
|
+
# Go over all nodes that should be taken into consideration when computing the weights memory utilization.
|
|
126
127
|
for n in graph.get_sorted_activation_configurable_nodes():
|
|
127
128
|
node_idx = mp_nodes.index(n.name)
|
|
128
129
|
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
|
@@ -135,14 +136,14 @@ def activation_output_size_kpi(mp_cfg: List[int],
|
|
|
135
136
|
return np.array(activation_memory)
|
|
136
137
|
|
|
137
138
|
|
|
138
|
-
def
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
def total_weights_activation_utilization(mp_cfg: List[int],
|
|
140
|
+
graph: Graph,
|
|
141
|
+
fw_info: FrameworkInfo,
|
|
142
|
+
fw_impl: FrameworkImplementation) -> np.ndarray:
|
|
142
143
|
"""
|
|
143
|
-
Computes
|
|
144
|
+
Computes resource utilization tensor with the respective weights size and output memory size for each activation configurable node,
|
|
144
145
|
according to the given mixed-precision configuration.
|
|
145
|
-
If an empty configuration is given, then computes
|
|
146
|
+
If an empty configuration is given, then computes resource utilization vector for non-configurable nodes.
|
|
146
147
|
|
|
147
148
|
Args:
|
|
148
149
|
mp_cfg: A mixed-precision configuration (list of candidates index for each configurable node)
|
|
@@ -160,15 +161,15 @@ def total_weights_activation_kpi(mp_cfg: List[int],
|
|
|
160
161
|
activation_mp_nodes = [n.name for n in graph.get_sorted_activation_configurable_nodes()]
|
|
161
162
|
|
|
162
163
|
if len(mp_cfg) == 0:
|
|
163
|
-
# Computing non-configurable nodes
|
|
164
|
+
# Computing non-configurable nodes utilization
|
|
164
165
|
for n in graph.nodes:
|
|
165
166
|
|
|
166
167
|
non_configurable = False
|
|
167
168
|
node_weights_memory_in_bytes, node_activation_memory_in_bytes = 0, 0
|
|
168
169
|
|
|
169
170
|
# Non-configurable Weights
|
|
170
|
-
# TODO: currently considering only kernel attributes in weights
|
|
171
|
-
# quantization we need to modify this method to count all attributes.
|
|
171
|
+
# TODO: currently considering only kernel attributes in weights memory utilization.
|
|
172
|
+
# When enabling multi-attribute quantization we need to modify this method to count all attributes.
|
|
172
173
|
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
|
173
174
|
if kernel_attr is not None:
|
|
174
175
|
is_non_configurable_weights = n.name not in weights_mp_nodes and \
|
|
@@ -196,9 +197,9 @@ def total_weights_activation_kpi(mp_cfg: List[int],
|
|
|
196
197
|
np.array([node_weights_memory_in_bytes, node_activation_memory_in_bytes]))
|
|
197
198
|
else:
|
|
198
199
|
# Go over all nodes that should be taken into consideration when computing the weights or
|
|
199
|
-
# activation
|
|
200
|
+
# activation memory utilization (all configurable nodes).
|
|
200
201
|
for node_idx, n in enumerate(graph.get_configurable_sorted_nodes(fw_info)):
|
|
201
|
-
# TODO: currently considering only kernel attributes in weights
|
|
202
|
+
# TODO: currently considering only kernel attributes in weights memory utilization. When enabling multi-attribute
|
|
202
203
|
# quantization we need to modify this method to count all attributes.
|
|
203
204
|
|
|
204
205
|
node_qc = n.candidates_quantization_cfg[mp_cfg[node_idx]]
|
|
@@ -222,13 +223,13 @@ def total_weights_activation_kpi(mp_cfg: List[int],
|
|
|
222
223
|
return np.array(weights_activation_memory)
|
|
223
224
|
|
|
224
225
|
|
|
225
|
-
def
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
226
|
+
def bops_utilization(mp_cfg: List[int],
|
|
227
|
+
graph: Graph,
|
|
228
|
+
fw_info: FrameworkInfo,
|
|
229
|
+
fw_impl: FrameworkImplementation,
|
|
230
|
+
set_constraints: bool = True) -> np.ndarray:
|
|
230
231
|
"""
|
|
231
|
-
Computes a
|
|
232
|
+
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
|
232
233
|
according to the given mixed-precision configuration of a virtual graph with composed nodes.
|
|
233
234
|
|
|
234
235
|
Args:
|
|
@@ -236,7 +237,7 @@ def bops_kpi(mp_cfg: List[int],
|
|
|
236
237
|
graph: Graph object.
|
|
237
238
|
fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
|
|
238
239
|
fw_impl: FrameworkImplementation object with specific framework methods implementation.
|
|
239
|
-
set_constraints: A flag for utilizing the method for
|
|
240
|
+
set_constraints: A flag for utilizing the method for resource utilization computation of a
|
|
240
241
|
given config not for LP formalization purposes.
|
|
241
242
|
|
|
242
243
|
Returns: A vector of node's BOPS count.
|
|
@@ -245,12 +246,12 @@ def bops_kpi(mp_cfg: List[int],
|
|
|
245
246
|
"""
|
|
246
247
|
|
|
247
248
|
if not set_constraints:
|
|
248
|
-
return
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
249
|
+
return _bops_utilization(mp_cfg,
|
|
250
|
+
graph,
|
|
251
|
+
fw_info,
|
|
252
|
+
fw_impl)
|
|
252
253
|
|
|
253
|
-
# BOPs
|
|
254
|
+
# BOPs utilization method considers non-configurable nodes, therefore, it doesn't need separate implementation
|
|
254
255
|
# for non-configurable nodes for setting a constraint (no need for separate implementation for len(mp_cfg) = 0).
|
|
255
256
|
|
|
256
257
|
virtual_bops_nodes = [n for n in graph.get_topo_sorted_nodes() if isinstance(n, VirtualActivationWeightsNode)]
|
|
@@ -261,12 +262,12 @@ def bops_kpi(mp_cfg: List[int],
|
|
|
261
262
|
return np.array(bops)
|
|
262
263
|
|
|
263
264
|
|
|
264
|
-
def
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
265
|
+
def _bops_utilization(mp_cfg: List[int],
|
|
266
|
+
graph: Graph,
|
|
267
|
+
fw_info: FrameworkInfo,
|
|
268
|
+
fw_impl: FrameworkImplementation) -> np.ndarray:
|
|
268
269
|
"""
|
|
269
|
-
Computes a
|
|
270
|
+
Computes a resource utilization vector with the respective bit-operations (BOPS) count for each configurable node,
|
|
270
271
|
according to the given mixed-precision configuration of an original graph.
|
|
271
272
|
|
|
272
273
|
Args:
|
|
@@ -281,19 +282,18 @@ def _bops_kpi(mp_cfg: List[int],
|
|
|
281
282
|
|
|
282
283
|
mp_nodes = graph.get_configurable_sorted_nodes_names(fw_info)
|
|
283
284
|
|
|
284
|
-
# Go over all nodes that should be taken into consideration when computing the BOPS
|
|
285
|
+
# Go over all nodes that should be taken into consideration when computing the BOPS utilization.
|
|
285
286
|
bops = []
|
|
286
287
|
for n in graph.get_topo_sorted_nodes():
|
|
287
288
|
if n.has_kernel_weight_to_quantize(fw_info):
|
|
288
289
|
# If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
|
|
289
290
|
incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
|
|
290
291
|
if len(incoming_edges) != 1:
|
|
291
|
-
Logger.critical(f"
|
|
292
|
-
|
|
292
|
+
Logger.critical(f"Unable to compute BOPS metric for node {n.name} due to multiple inputs.") # pragma: no cover
|
|
293
293
|
input_activation_node = incoming_edges[0].source_node
|
|
294
294
|
if len(graph.out_edges(input_activation_node)) > 1:
|
|
295
295
|
# In the case where the activation node has multiple outgoing edges
|
|
296
|
-
# we don't consider this edge in the BOPS
|
|
296
|
+
# we don't consider this edge in the BOPS utilization calculation
|
|
297
297
|
continue
|
|
298
298
|
|
|
299
299
|
input_activation_node_cfg = input_activation_node.candidates_quantization_cfg[_get_node_cfg_idx(input_activation_node, mp_cfg, mp_nodes)]
|
|
@@ -338,7 +338,7 @@ def _get_node_cfg_idx(node: BaseNode, mp_cfg: List[int], sorted_configurable_nod
|
|
|
338
338
|
|
|
339
339
|
def _get_origin_weights_node(n: BaseNode) -> BaseNode:
|
|
340
340
|
"""
|
|
341
|
-
In case we run a
|
|
341
|
+
In case we run a resource utilization computation on a virtual graph,
|
|
342
342
|
this method is used to retrieve the original node out of a virtual weights node,
|
|
343
343
|
|
|
344
344
|
Args:
|
|
@@ -358,7 +358,7 @@ def _get_origin_weights_node(n: BaseNode) -> BaseNode:
|
|
|
358
358
|
|
|
359
359
|
def _get_origin_activation_node(n: BaseNode) -> BaseNode:
|
|
360
360
|
"""
|
|
361
|
-
In case we run a
|
|
361
|
+
In case we run a resource utilization computation on a virtual graph,
|
|
362
362
|
this method is used to retrieve the original node out of a virtual activation node,
|
|
363
363
|
|
|
364
364
|
Args:
|
|
@@ -417,25 +417,25 @@ def _compute_node_activation_memory(n: BaseNode, node_nbits: int) -> float:
|
|
|
417
417
|
return node_output_size * node_nbits / BITS_TO_BYTES
|
|
418
418
|
|
|
419
419
|
|
|
420
|
-
class
|
|
420
|
+
class MpRuMetric(Enum):
|
|
421
421
|
"""
|
|
422
|
-
Defines
|
|
423
|
-
The enum values can be used to call a function on a set of arguments.
|
|
422
|
+
Defines resource utilization computation functions that can be used to compute bops_utilization for a given target
|
|
423
|
+
for a given mp config. The enum values can be used to call a function on a set of arguments.
|
|
424
424
|
|
|
425
|
-
WEIGHTS_SIZE - applies the
|
|
425
|
+
WEIGHTS_SIZE - applies the weights_size_utilization function
|
|
426
426
|
|
|
427
|
-
ACTIVATION_OUTPUT_SIZE - applies the
|
|
427
|
+
ACTIVATION_OUTPUT_SIZE - applies the activation_output_size_utilization function
|
|
428
428
|
|
|
429
|
-
TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the
|
|
429
|
+
TOTAL_WEIGHTS_ACTIVATION_SIZE - applies the total_weights_activation_utilization function
|
|
430
430
|
|
|
431
|
-
BOPS_COUNT - applies the
|
|
431
|
+
BOPS_COUNT - applies the bops_utilization function
|
|
432
432
|
|
|
433
433
|
"""
|
|
434
434
|
|
|
435
|
-
WEIGHTS_SIZE = partial(
|
|
436
|
-
ACTIVATION_OUTPUT_SIZE = partial(
|
|
437
|
-
TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(
|
|
438
|
-
BOPS_COUNT = partial(
|
|
435
|
+
WEIGHTS_SIZE = partial(weights_size_utilization)
|
|
436
|
+
ACTIVATION_OUTPUT_SIZE = partial(activation_output_size_utilization)
|
|
437
|
+
TOTAL_WEIGHTS_ACTIVATION_SIZE = partial(total_weights_activation_utilization)
|
|
438
|
+
BOPS_COUNT = partial(bops_utilization)
|
|
439
439
|
|
|
440
440
|
def __call__(self, *args):
|
|
441
441
|
return self.value(*args)
|