mct-nightly 2.2.0.20250113.527__py3-none-any.whl → 2.2.0.20250114.84821__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/RECORD +103 -105
- model_compression_toolkit/__init__.py +2 -2
- model_compression_toolkit/core/common/framework_info.py +1 -3
- model_compression_toolkit/core/common/fusion/layer_fusing.py +6 -5
- model_compression_toolkit/core/common/graph/base_graph.py +20 -21
- model_compression_toolkit/core/common/graph/base_node.py +44 -17
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py +7 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +0 -6
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +26 -135
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +36 -62
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +667 -0
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +25 -202
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_methods.py +164 -470
- model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +30 -7
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +3 -5
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +2 -2
- model_compression_toolkit/core/common/pruning/greedy_mask_calculator.py +7 -6
- model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py +0 -1
- model_compression_toolkit/core/common/pruning/pruner.py +5 -3
- model_compression_toolkit/core/common/quantization/bit_width_config.py +6 -12
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +1 -2
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +2 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +1 -1
- model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +15 -14
- model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py +1 -1
- model_compression_toolkit/core/common/substitutions/batchnorm_refusing.py +1 -1
- model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +5 -5
- model_compression_toolkit/core/graph_prep_runner.py +12 -11
- model_compression_toolkit/core/keras/data_util.py +24 -5
- model_compression_toolkit/core/keras/default_framework_info.py +1 -1
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +1 -2
- model_compression_toolkit/core/keras/resource_utilization_data_facade.py +5 -6
- model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +1 -1
- model_compression_toolkit/core/pytorch/default_framework_info.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +1 -1
- model_compression_toolkit/core/pytorch/resource_utilization_data_facade.py +4 -5
- model_compression_toolkit/core/runner.py +33 -60
- model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/metadata.py +11 -10
- model_compression_toolkit/pruning/keras/pruning_facade.py +5 -6
- model_compression_toolkit/pruning/pytorch/pruning_facade.py +6 -7
- model_compression_toolkit/ptq/keras/quantization_facade.py +8 -9
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +8 -9
- model_compression_toolkit/qat/keras/quantization_facade.py +5 -6
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantization_facade.py +5 -9
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/target_platform_capabilities/__init__.py +9 -0
- model_compression_toolkit/target_platform_capabilities/constants.py +1 -1
- model_compression_toolkit/target_platform_capabilities/schema/mct_current_schema.py +2 -2
- model_compression_toolkit/target_platform_capabilities/schema/schema_functions.py +18 -18
- model_compression_toolkit/target_platform_capabilities/schema/v1.py +13 -13
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/__init__.py +6 -6
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2fw.py +10 -10
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2keras.py +3 -3
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attach2pytorch.py +3 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/current_tpc.py +8 -8
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities.py → targetplatform2framework/framework_quantization_capabilities.py} +40 -40
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework/target_platform_capabilities_component.py → targetplatform2framework/framework_quantization_capabilities_component.py} +2 -2
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/layer_filter_params.py +0 -1
- model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/operations_to_layers.py +8 -8
- model_compression_toolkit/target_platform_capabilities/tpc_io_handler.py +24 -24
- model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py +18 -18
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/{tp_model.py → tpc.py} +31 -32
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py +3 -3
- model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/latest/__init__.py +4 -4
- model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/{tp_model.py → tpc.py} +27 -27
- model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py +1 -2
- model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py +2 -1
- model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/lsq/symmetric_lsq.py +1 -2
- model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py +1 -1
- model_compression_toolkit/xquant/common/model_folding_utils.py +7 -6
- model_compression_toolkit/xquant/keras/keras_report_utils.py +4 -4
- model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py +3 -3
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_aggregation_methods.py +0 -105
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/ru_functions_mapping.py +0 -33
- model_compression_toolkit/target_platform_capabilities/target_platform/__init__.py +0 -23
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250113.527.dist-info → mct_nightly-2.2.0.20250114.84821.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/target_platform_capabilities/{target_platform/targetplatform2framework → targetplatform2framework}/attribute_filter.py +0 -0
@@ -12,29 +12,21 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
+
from dataclasses import dataclass
|
15
16
|
from enum import Enum
|
16
|
-
from typing import Dict, Any
|
17
|
+
from typing import Dict, Any, Set
|
17
18
|
|
18
19
|
import numpy as np
|
19
20
|
|
20
21
|
|
21
22
|
class RUTarget(Enum):
|
22
23
|
"""
|
23
|
-
|
24
|
-
For each target that we care to consider in a mixed-precision search, there should be defined a set of
|
25
|
-
resource utilization computation function, resource utilization aggregation function,
|
26
|
-
and resource utilization target (within a ResourceUtilization object).
|
27
|
-
|
28
|
-
Whenever adding a resource utilization metric to ResourceUtilization class we should add a matching target to this enum.
|
29
|
-
|
30
|
-
WEIGHTS - Weights memory ResourceUtilization metric.
|
31
|
-
|
32
|
-
ACTIVATION - Activation memory ResourceUtilization metric.
|
33
|
-
|
34
|
-
TOTAL - Total memory ResourceUtilization metric.
|
35
|
-
|
36
|
-
BOPS - Total Bit-Operations ResourceUtilization Metric.
|
24
|
+
Resource Utilization targets for mixed-precision search.
|
37
25
|
|
26
|
+
WEIGHTS - Weights memory.
|
27
|
+
ACTIVATION - Activation memory.
|
28
|
+
TOTAL - Total memory.
|
29
|
+
BOPS - Total Bit-Operations.
|
38
30
|
"""
|
39
31
|
|
40
32
|
WEIGHTS = 'weights'
|
@@ -43,34 +35,20 @@ class RUTarget(Enum):
|
|
43
35
|
BOPS = 'bops'
|
44
36
|
|
45
37
|
|
38
|
+
@dataclass
|
46
39
|
class ResourceUtilization:
|
47
40
|
"""
|
48
41
|
Class to represent measurements of performance.
|
49
|
-
"""
|
50
|
-
|
51
|
-
def __init__(self,
|
52
|
-
weights_memory: float = np.inf,
|
53
|
-
activation_memory: float = np.inf,
|
54
|
-
total_memory: float = np.inf,
|
55
|
-
bops: float = np.inf):
|
56
|
-
"""
|
57
|
-
|
58
|
-
Args:
|
59
|
-
weights_memory: Memory of a model's weights in bytes. Note that this includes only coefficients that should be quantized (for example, the kernel of Conv2D in Keras will be affected by this value, while the bias will not).
|
60
|
-
activation_memory: Memory of a model's activation in bytes, according to the given activation resource utilization metric.
|
61
|
-
total_memory: The sum of model's activation and weights memory in bytes, according to the given total resource utilization metric.
|
62
|
-
bops: The total bit-operations in the model.
|
63
|
-
"""
|
64
|
-
self.weights_memory = weights_memory
|
65
|
-
self.activation_memory = activation_memory
|
66
|
-
self.total_memory = total_memory
|
67
|
-
self.bops = bops
|
68
42
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
43
|
+
weights_memory: Memory of a model's weights in bytes.
|
44
|
+
activation_memory: Memory of a model's activation in bytes.
|
45
|
+
total_memory: The sum of model's activation and weights memory in bytes.
|
46
|
+
bops: The total bit-operations in the model.
|
47
|
+
"""
|
48
|
+
weights_memory: float = np.inf
|
49
|
+
activation_memory: float = np.inf
|
50
|
+
total_memory: float = np.inf
|
51
|
+
bops: float = np.inf
|
74
52
|
|
75
53
|
def weight_restricted(self):
|
76
54
|
return self.weights_memory < np.inf
|
@@ -93,34 +71,30 @@ class ResourceUtilization:
|
|
93
71
|
RUTarget.TOTAL: self.total_memory,
|
94
72
|
RUTarget.BOPS: self.bops}
|
95
73
|
|
96
|
-
def
|
74
|
+
def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool:
|
97
75
|
"""
|
98
|
-
|
76
|
+
Checks whether another ResourceUtilization object satisfies the constraints defined by the current object.
|
99
77
|
|
100
78
|
Args:
|
101
|
-
|
79
|
+
ru: A ResourceUtilization object to check against the current object.
|
102
80
|
|
81
|
+
Returns:
|
82
|
+
Whether all constraints are satisfied.
|
103
83
|
"""
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
84
|
+
return bool(ru.weights_memory <= self.weights_memory and \
|
85
|
+
ru.activation_memory <= self.activation_memory and \
|
86
|
+
ru.total_memory <= self.total_memory and \
|
87
|
+
ru.bops <= self.bops)
|
108
88
|
|
109
|
-
def
|
110
|
-
|
111
|
-
|
112
|
-
the current ResourceUtilization object.
|
89
|
+
def get_restricted_metrics(self) -> Set[RUTarget]:
|
90
|
+
d = self.get_resource_utilization_dict()
|
91
|
+
return {k for k, v in d.items() if v < np.inf}
|
113
92
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
Returns: True if all the given resource utilization values are not greater than the referenced resource utilization values.
|
93
|
+
def is_any_restricted(self) -> bool:
|
94
|
+
return bool(self.get_restricted_metrics())
|
118
95
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
ru.activation_memory <= self.activation_memory and \
|
125
|
-
ru.total_memory <= self.total_memory and \
|
126
|
-
ru.bops <= self.bops
|
96
|
+
def __repr__(self):
|
97
|
+
return f"Weights_memory: {self.weights_memory}, " \
|
98
|
+
f"Activation_memory: {self.activation_memory}, " \
|
99
|
+
f"Total_memory: {self.total_memory}, " \
|
100
|
+
f"BOPS: {self.bops}"
|