mct-nightly 2.3.0.20250512.625__py3-none-any.whl → 2.3.0.20250513.611__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/METADATA +1 -1
- {mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/RECORD +23 -23
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/framework_implementation.py +6 -33
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py +22 -3
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +8 -5
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +69 -58
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +82 -79
- model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py +32 -26
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +5 -4
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +7 -0
- model_compression_toolkit/core/common/similarity_analyzer.py +1 -1
- model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py +37 -73
- model_compression_toolkit/core/keras/keras_implementation.py +8 -45
- model_compression_toolkit/core/keras/mixed_precision/configurable_activation_quantizer.py +7 -5
- model_compression_toolkit/core/keras/mixed_precision/configurable_weights_quantizer.py +6 -5
- model_compression_toolkit/core/pytorch/back2framework/mixed_precision_model_builder.py +46 -78
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py +7 -9
- model_compression_toolkit/core/pytorch/mixed_precision/configurable_weights_quantizer.py +12 -10
- model_compression_toolkit/core/pytorch/pytorch_implementation.py +6 -41
- {mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/WHEEL +0 -0
- {mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/licenses/LICENSE.md +0 -0
- {mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import List, Dict, Any
|
15
|
+
from typing import List, Dict, Any, Optional
|
16
16
|
|
17
17
|
import numpy as np
|
18
18
|
|
@@ -70,17 +70,17 @@ class ConfigurableActivationQuantizer(BaseKerasInferableQuantizer):
|
|
70
70
|
self.activation_quantizers = init_activation_quantizers(self.node_q_cfg)
|
71
71
|
self.active_quantization_config_index = max_candidate_idx # initialize with first config as default
|
72
72
|
|
73
|
-
def set_active_activation_quantizer(self, index: int):
|
73
|
+
def set_active_activation_quantizer(self, index: Optional[int]):
|
74
74
|
"""
|
75
75
|
Set an index to use for the activation quantizer to return when requested.
|
76
76
|
|
77
77
|
Args:
|
78
78
|
index: Index of a candidate quantization configuration to use its quantized
|
79
|
-
version of the float weight.
|
79
|
+
version of the float weight, or None to disable quantization.
|
80
80
|
"""
|
81
81
|
|
82
|
-
assert index < len(self.node_q_cfg), f'Quantizer has {len(self.node_q_cfg)} ' \
|
83
|
-
|
82
|
+
assert index is None or index < len(self.node_q_cfg), f'Quantizer has {len(self.node_q_cfg)} ' \
|
83
|
+
f'possible nbits. Can not set index {index}'
|
84
84
|
self.active_quantization_config_index = index
|
85
85
|
|
86
86
|
def __call__(self,
|
@@ -96,6 +96,8 @@ class ConfigurableActivationQuantizer(BaseKerasInferableQuantizer):
|
|
96
96
|
Returns:
|
97
97
|
Quantized activation tensor.
|
98
98
|
"""
|
99
|
+
if self.active_quantization_config_index is None:
|
100
|
+
return inputs.numpy()
|
99
101
|
return self.activation_quantizers[self.active_quantization_config_index](inputs)
|
100
102
|
|
101
103
|
def get_config(self) -> Dict[str, Any]: # pragma: no cover
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
from functools import partial
|
16
|
-
from typing import Dict, Any, List
|
16
|
+
from typing import Dict, Any, List, Optional
|
17
17
|
|
18
18
|
from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
|
19
19
|
verify_candidates_descending_order, init_quantized_weights
|
@@ -88,17 +88,17 @@ class ConfigurableWeightsQuantizer(BaseKerasInferableQuantizer):
|
|
88
88
|
self.active_quantization_config_index = self.max_candidate_idx
|
89
89
|
|
90
90
|
def set_weights_bit_width_index(self,
|
91
|
-
index: int):
|
91
|
+
index: Optional[int]):
|
92
92
|
"""
|
93
93
|
Change the "active" bitwidth index the configurable quantizer uses, so a different quantized weight
|
94
94
|
will be used.
|
95
95
|
|
96
96
|
Args:
|
97
|
-
index: Quantization configuration candidate index to use.
|
97
|
+
index: Quantization configuration candidate index to use, or None to disable quantization.
|
98
98
|
|
99
99
|
"""
|
100
100
|
|
101
|
-
if index >= len(self.node_q_cfg):
|
101
|
+
if index and index >= len(self.node_q_cfg):
|
102
102
|
Logger.critical(f'Quantizer supports only {len(self.node_q_cfg)} bit width configurations; index {index} is out of range.')# pragma: no cover
|
103
103
|
self.active_quantization_config_index = index
|
104
104
|
|
@@ -118,7 +118,8 @@ class ConfigurableWeightsQuantizer(BaseKerasInferableQuantizer):
|
|
118
118
|
specific quantization configuration candidate (the candidate's index is the
|
119
119
|
index that is in active_quantization_config_index the quantizer holds).
|
120
120
|
"""
|
121
|
-
|
121
|
+
if self.active_quantization_config_index is None:
|
122
|
+
return self.float_weights
|
122
123
|
return self.quantized_weights[self.active_quantization_config_index]
|
123
124
|
|
124
125
|
def get_config(self) -> Dict[str, Any]: # pragma: no cover
|
@@ -16,26 +16,18 @@
|
|
16
16
|
from typing import List, Any, Tuple, Union, Dict
|
17
17
|
|
18
18
|
import torch
|
19
|
-
from mct_quantizers import PytorchQuantizationWrapper,
|
20
|
-
PytorchActivationQuantizationHolder
|
19
|
+
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
|
21
20
|
from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER
|
22
|
-
from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
|
23
|
-
from mct_quantizers.pytorch.quantizers import BasePyTorchInferableQuantizer
|
24
21
|
|
25
|
-
from model_compression_toolkit.core import FrameworkInfo
|
26
|
-
from model_compression_toolkit.core import common
|
22
|
+
from model_compression_toolkit.core import FrameworkInfo, common
|
27
23
|
from model_compression_toolkit.core.common import BaseNode
|
28
24
|
from model_compression_toolkit.core.common.user_info import UserInformation
|
29
25
|
from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
|
30
|
-
|
31
26
|
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
|
32
27
|
from model_compression_toolkit.core.pytorch.mixed_precision.configurable_activation_quantizer import \
|
33
28
|
ConfigurableActivationQuantizer
|
34
29
|
from model_compression_toolkit.core.pytorch.mixed_precision.configurable_weights_quantizer import \
|
35
30
|
ConfigurableWeightsQuantizer
|
36
|
-
|
37
|
-
from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.node_to_quantizer import \
|
38
|
-
get_weights_inferable_quantizer_kwargs, get_activation_inferable_quantizer_kwargs
|
39
31
|
from model_compression_toolkit.logger import Logger
|
40
32
|
|
41
33
|
|
@@ -77,44 +69,25 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
|
|
77
69
|
n: A node of mct graph.
|
78
70
|
layer: A pytorch layer
|
79
71
|
|
80
|
-
Returns:
|
81
|
-
|
82
|
-
|
72
|
+
Returns:
|
73
|
+
Wrapped layer with a configurable quantizer if the layer should be quantized in mixed precision, or the
|
74
|
+
layer as is.
|
83
75
|
|
76
|
+
Raises:
|
77
|
+
ValueError: if kernel attribute is quantized but not configurable.
|
84
78
|
"""
|
85
79
|
|
86
|
-
weights_conf_nodes_names = [n.name for n in self.graph.get_weights_configurable_nodes(self.fw_info)]
|
87
80
|
kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
|
88
|
-
if kernel_attr is
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
# TODO: Do we want to include other quantized attributes that are not
|
99
|
-
# the kernel attribute in the mixed precision model?
|
100
|
-
# Currently, we only consider kernel attribute quantization (whether it is in mixed precision
|
101
|
-
# or single precision).
|
102
|
-
node_weights_qc = n.get_unique_weights_candidates(kernel_attr)
|
103
|
-
if not len(node_weights_qc) == 1:
|
104
|
-
Logger.critical(f"Expected a single weights quantization configuration for node '{n.name}', but found ({len(node_weights_qc)}) configurations.")# pragma: no cover
|
105
|
-
|
106
|
-
quantier_for_node = get_inferable_quantizer_class(QuantizationTarget.Weights,
|
107
|
-
node_weights_qc[0].weights_quantization_cfg
|
108
|
-
.get_attr_config(kernel_attr)
|
109
|
-
.weights_quantization_method,
|
110
|
-
BasePyTorchInferableQuantizer)
|
111
|
-
kwargs = get_weights_inferable_quantizer_kwargs(node_weights_qc[0].weights_quantization_cfg,
|
112
|
-
kernel_attr)
|
113
|
-
|
114
|
-
return PytorchQuantizationWrapper(layer,
|
115
|
-
weights_quantizers={kernel_attr: quantier_for_node(**kwargs)})
|
116
|
-
|
117
|
-
return layer
|
81
|
+
if kernel_attr is None or not n.is_weights_quantization_enabled(kernel_attr):
|
82
|
+
return layer
|
83
|
+
if not n.is_configurable_weight(kernel_attr): # pragma: no cover
|
84
|
+
raise ValueError(f'Weight wrapper is not expected to be created for non-configurable weight of node {n}.')
|
85
|
+
return PytorchQuantizationWrapper(layer,
|
86
|
+
weights_quantizers={
|
87
|
+
kernel_attr: ConfigurableWeightsQuantizer(
|
88
|
+
**self._get_weights_configurable_quantizer_kwargs(n,
|
89
|
+
kernel_attr),
|
90
|
+
kernel_attr=kernel_attr)})
|
118
91
|
|
119
92
|
def _get_weights_configurable_quantizer_kwargs(self, n: BaseNode, attr: str) -> Dict[str, Any]:
|
120
93
|
"""
|
@@ -145,9 +118,8 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
|
|
145
118
|
|
146
119
|
def mixed_precision_activation_holder(self, n: BaseNode, holder_type: PytorchActivationQuantizationHolder = PytorchActivationQuantizationHolder) -> PytorchActivationQuantizationHolder:
|
147
120
|
"""
|
148
|
-
|
149
|
-
|
150
|
-
or an inferable quantizer for fixed single bit-width quantization.
|
121
|
+
Builds PytorchActivationQuantizationHolder layer with a configurable quantizer for mixed precision for a node
|
122
|
+
with a configurable activation.
|
151
123
|
|
152
124
|
Args:
|
153
125
|
n: Node to get PytorchActivationQuantizationHolder to attach in its output.
|
@@ -155,39 +127,35 @@ class MixedPrecisionPyTorchModelBuilder(PyTorchModelBuilder):
|
|
155
127
|
|
156
128
|
Returns:
|
157
129
|
A PytorchActivationQuantizationHolder layer for the node activation quantization.
|
130
|
+
|
131
|
+
Raises:
|
132
|
+
ValueError: if node's activation is not configurable.
|
158
133
|
"""
|
134
|
+
if holder_type != PytorchActivationQuantizationHolder: # pragma: no cover
|
135
|
+
raise TypeError(f'Expected PytorchActivationQuantizationHolder holder type for mixed precision, got'
|
136
|
+
f'{holder_type}')
|
137
|
+
|
138
|
+
if not n.has_configurable_activation(): # pragma: no cover
|
139
|
+
raise ValueError(f'Activation holder is not expected to be created for a non-configurable activation of '
|
140
|
+
f'node {n}')
|
141
|
+
|
142
|
+
num_of_outputs = len(n.output_shape) if isinstance(n.output_shape, list) else 1
|
159
143
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
max_candidate_idx = n.find_max_candidate_index()
|
176
|
-
|
177
|
-
kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
|
178
|
-
activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
|
179
|
-
'max_candidate_idx': max_candidate_idx,
|
180
|
-
'kernel_attr': kernel_attr})] \
|
181
|
-
* num_of_outputs
|
182
|
-
else:
|
183
|
-
node_act_qc = n.get_unique_activation_candidates()
|
184
|
-
assert len(node_act_qc) == 1, f"Expected a single activation configuration for node '{n.name}', but found multiple ({len(node_act_qc)}) configurations."
|
185
|
-
quantizer_for_node = get_inferable_quantizer_class(QuantizationTarget.Activation,
|
186
|
-
node_act_qc[0].activation_quantization_cfg.activation_quantization_method,
|
187
|
-
BasePyTorchInferableQuantizer)
|
188
|
-
kwargs = get_activation_inferable_quantizer_kwargs(node_act_qc[0].activation_quantization_cfg)
|
189
|
-
|
190
|
-
activation_quantizers = [quantizer_for_node(**kwargs)] * num_of_outputs
|
144
|
+
node_q_cfg_candidates = n.candidates_quantization_cfg
|
145
|
+
|
146
|
+
# sorting the candidates by kernel attribute weights number of bits first and then by
|
147
|
+
# activation number of bits (in reversed order).
|
148
|
+
# since only kernel attribute is quantized in weights mixed precision,
|
149
|
+
# if the node doesn't have a kernel attribute, we only sort by activation_n_bits.
|
150
|
+
n.sort_node_candidates(self.fw_info)
|
151
|
+
|
152
|
+
max_candidate_idx = n.find_max_candidate_index()
|
153
|
+
|
154
|
+
kernel_attr = self.fw_info.get_kernel_op_attributes(n.type)[0]
|
155
|
+
activation_quantizers = [ConfigurableActivationQuantizer(**{'node_q_cfg': node_q_cfg_candidates,
|
156
|
+
'max_candidate_idx': max_candidate_idx,
|
157
|
+
'kernel_attr': kernel_attr})] \
|
158
|
+
* num_of_outputs
|
191
159
|
|
192
160
|
# Holder by definition uses a single quantizer for the activation quantization
|
193
161
|
# thus we make sure this is the only possible case (unless it's a node with no activation
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
|
-
from typing import
|
16
|
+
from typing import List, Optional
|
17
17
|
|
18
18
|
from model_compression_toolkit.core.common.mixed_precision.configurable_quant_id import ConfigurableQuantizerIdentifier
|
19
19
|
from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
|
@@ -70,18 +70,15 @@ class ConfigurableActivationQuantizer(BasePyTorchInferableQuantizer):
|
|
70
70
|
self.activation_quantizers = init_activation_quantizers(self.node_q_cfg)
|
71
71
|
self.active_quantization_config_index = max_candidate_idx # initialize with first config as default
|
72
72
|
|
73
|
-
def set_active_activation_quantizer(self,
|
74
|
-
index: int):
|
73
|
+
def set_active_activation_quantizer(self, index: Optional[int]):
|
75
74
|
"""
|
76
75
|
Set an activation quantizer to use by the layer wrapped by the module.
|
77
76
|
|
78
77
|
Args:
|
79
|
-
index: Index of a candidate quantization configuration to use
|
80
|
-
for quantizing the activation.
|
78
|
+
index: Index of a candidate quantization configuration to use, or None to disable quantization.
|
81
79
|
"""
|
82
|
-
|
83
|
-
|
84
|
-
f'possible nbits. Can not set index {index}'
|
80
|
+
assert index is None or index < len(self.node_q_cfg), (f'Quantizer has {len(self.node_q_cfg)} possible nbits. '
|
81
|
+
f'Can not set index {index}')
|
85
82
|
self.active_quantization_config_index = index
|
86
83
|
|
87
84
|
def __call__(self,
|
@@ -97,5 +94,6 @@ class ConfigurableActivationQuantizer(BasePyTorchInferableQuantizer):
|
|
97
94
|
Returns:
|
98
95
|
Quantized activation tensor.
|
99
96
|
"""
|
100
|
-
|
97
|
+
if self.active_quantization_config_index is None:
|
98
|
+
return inputs
|
101
99
|
return self.activation_quantizers[self.active_quantization_config_index](inputs)
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import
|
15
|
+
from typing import List, Optional
|
16
16
|
|
17
17
|
from model_compression_toolkit.core.common.mixed_precision.configurable_quant_id import ConfigurableQuantizerIdentifier
|
18
18
|
from model_compression_toolkit.core.common.mixed_precision.configurable_quantizer_utils import \
|
@@ -87,20 +87,18 @@ class ConfigurableWeightsQuantizer(BasePyTorchInferableQuantizer):
|
|
87
87
|
|
88
88
|
self.active_quantization_config_index = self.max_candidate_idx
|
89
89
|
|
90
|
-
def set_weights_bit_width_index(self,
|
91
|
-
index: int):
|
90
|
+
def set_weights_bit_width_index(self, index:Optional[int]):
|
92
91
|
"""
|
93
92
|
Change the "active" bitwidth index the configurable quantizer uses, so a different quantized weight
|
94
93
|
will be used.
|
95
94
|
|
96
95
|
Args:
|
97
|
-
index: Quantization configuration candidate index to use.
|
96
|
+
index: Quantization configuration candidate index to use, or None to disable quantization.
|
98
97
|
|
99
98
|
"""
|
100
99
|
|
101
|
-
assert index < len(self.node_q_cfg), \
|
102
|
-
f'Quantizer has {len(self.node_q_cfg)} '
|
103
|
-
f'possible nbits. Can not set index {index}'
|
100
|
+
assert index is None or index < len(self.node_q_cfg), \
|
101
|
+
f'Quantizer has {len(self.node_q_cfg)} possible nbits. Can not set index {index}'
|
104
102
|
self.active_quantization_config_index = index
|
105
103
|
|
106
104
|
def __call__(self,
|
@@ -112,12 +110,16 @@ class ConfigurableWeightsQuantizer(BasePyTorchInferableQuantizer):
|
|
112
110
|
to the current active_quantization_config_index.
|
113
111
|
|
114
112
|
Args:
|
115
|
-
inputs: Input tensor (
|
113
|
+
inputs: Input tensor (only used if quantization is disabled).
|
116
114
|
|
117
115
|
Returns:
|
118
116
|
Quantized weight, that was quantized using number of bits that is in a
|
119
117
|
specific quantization configuration candidate (the candidate's index is the
|
120
|
-
index that is in active_quantization_config_index the quantizer holds)
|
118
|
+
index that is in active_quantization_config_index the quantizer holds),
|
119
|
+
or detached input if quantization is disabled.
|
121
120
|
"""
|
122
|
-
|
121
|
+
if self.active_quantization_config_index is None:
|
122
|
+
# Note: must be detached, otherwise quantization wrapper will inject it back as a Parameter to the
|
123
|
+
# underlying layer, which then causes crash during inference next time the quantizer is enabled
|
124
|
+
return inputs.detach()
|
123
125
|
return self.quantized_weights[self.active_quantization_config_index]
|
@@ -26,14 +26,12 @@ from torch.nn import Module, Sigmoid, Softmax
|
|
26
26
|
|
27
27
|
import model_compression_toolkit.core.pytorch.constants as pytorch_constants
|
28
28
|
from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
|
29
|
-
from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
|
29
|
+
from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
|
30
30
|
from model_compression_toolkit.core import common
|
31
31
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
32
32
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
33
33
|
from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
|
34
|
-
from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode
|
35
|
-
from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
|
36
|
-
from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import set_layer_to_bitwidth
|
34
|
+
from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode
|
37
35
|
from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
|
38
36
|
from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
|
39
37
|
from model_compression_toolkit.core.common.similarity_analyzer import compute_mse, compute_kl_divergence, compute_cs
|
@@ -112,6 +110,10 @@ class PytorchImplementation(FrameworkImplementation):
|
|
112
110
|
"""
|
113
111
|
A class with implemented methods to support optimizing Pytorch models.
|
114
112
|
"""
|
113
|
+
weights_quant_layer_cls = PytorchQuantizationWrapper,
|
114
|
+
activation_quant_layer_cls = PytorchActivationQuantizationHolder
|
115
|
+
configurable_weights_quantizer_cls = ConfigurableWeightsQuantizer
|
116
|
+
configurable_activation_quantizer_cls = ConfigurableActivationQuantizer
|
115
117
|
|
116
118
|
def __init__(self):
|
117
119
|
super().__init__()
|
@@ -397,43 +399,6 @@ class PytorchImplementation(FrameworkImplementation):
|
|
397
399
|
substitutions_list.append(pytorch_batchnorm_refusing())
|
398
400
|
return substitutions_list
|
399
401
|
|
400
|
-
def get_sensitivity_evaluator(self,
|
401
|
-
graph: Graph,
|
402
|
-
quant_config: MixedPrecisionQuantizationConfig,
|
403
|
-
representative_data_gen: Callable,
|
404
|
-
fw_info: FrameworkInfo,
|
405
|
-
disable_activation_for_metric: bool = False,
|
406
|
-
hessian_info_service: HessianInfoService = None
|
407
|
-
) -> SensitivityEvaluation:
|
408
|
-
"""
|
409
|
-
Creates and returns an object which handles the computation of a sensitivity metric for a mixed-precision
|
410
|
-
configuration (comparing to the float model).
|
411
|
-
|
412
|
-
Args:
|
413
|
-
graph: Graph to build its float and mixed-precision models.
|
414
|
-
quant_config: QuantizationConfig of how the model should be quantized.
|
415
|
-
representative_data_gen: Dataset to use for retrieving images for the models inputs.
|
416
|
-
fw_info: FrameworkInfo object with information about the specific framework's model.
|
417
|
-
disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
|
418
|
-
hessian_info_service: HessianScoresService to fetch approximations of the hessian scores for the float model.
|
419
|
-
|
420
|
-
Returns:
|
421
|
-
A SensitivityEvaluation object.
|
422
|
-
"""
|
423
|
-
|
424
|
-
return SensitivityEvaluation(graph=graph,
|
425
|
-
quant_config=quant_config,
|
426
|
-
representative_data_gen=representative_data_gen,
|
427
|
-
fw_info=fw_info,
|
428
|
-
fw_impl=self,
|
429
|
-
set_layer_to_bitwidth=partial(set_layer_to_bitwidth,
|
430
|
-
weights_quantizer_type=ConfigurableWeightsQuantizer,
|
431
|
-
activation_quantizer_type=ConfigurableActivationQuantizer,
|
432
|
-
weights_quant_layer_type=PytorchQuantizationWrapper,
|
433
|
-
activation_quant_layer_type=PytorchActivationQuantizationHolder),
|
434
|
-
disable_activation_for_metric=disable_activation_for_metric,
|
435
|
-
hessian_info_service=hessian_info_service)
|
436
|
-
|
437
402
|
def get_node_prior_info(self,
|
438
403
|
node: BaseNode,
|
439
404
|
fw_info: FrameworkInfo,
|
File without changes
|
File without changes
|
{mct_nightly-2.3.0.20250512.625.dist-info → mct_nightly-2.3.0.20250513.611.dist-info}/top_level.txt
RENAMED
File without changes
|