mct-nightly 2.3.0.20250602.610__py3-none-any.whl → 2.3.0.20250604.611__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/METADATA +2 -2
- {mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/RECORD +12 -13
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/__init__.py +2 -2
- model_compression_toolkit/core/common/mixed_precision/__init__.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py +32 -5
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +21 -12
- model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py +68 -65
- model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py +6 -4
- model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/distance_weighting.py +0 -76
- {mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/WHEEL +0 -0
- {mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/licenses/LICENSE.md +0 -0
- {mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/top_level.txt +0 -0
{mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mct-nightly
|
3
|
-
Version: 2.3.0.
|
3
|
+
Version: 2.3.0.20250604.611
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
5
5
|
Author-email: ssi-dnn-dev@sony.com
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
@@ -236,6 +236,6 @@ MCT is licensed under Apache License Version 2.0. By contributing to the project
|
|
236
236
|
|
237
237
|
[3] [TORCHVISION.MODELS](https://pytorch.org/vision/stable/models.html)
|
238
238
|
|
239
|
-
[4] Gordon, O., Cohen, E., Habi, H.
|
239
|
+
[4] Gordon, O., Cohen, E., Habi, H.V., Netzer, A. (2025). [EPTQ: Enhanced Post-Training Quantization via Hessian-guided Network-wise Optimization – ECCV 2024 Workshops](https://link.springer.com/chapter/10.1007/978-3-031-91979-4_13)
|
240
240
|
|
241
241
|
[5] Dikstein, L., Lapid, A., Netzer, A., & Habi, H. V., 2024. [Data Generation for Hardware-Friendly Post-Training Quantization, Accepted to IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2025](https://openaccess.thecvf.com/content/WACV2025/papers/Dikstein_Data_Generation_for_Hardware-Friendly_Post-Training_Quantization_WACV_2025_paper.pdf)
|
{mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/RECORD
RENAMED
@@ -1,11 +1,11 @@
|
|
1
|
-
mct_nightly-2.3.0.
|
2
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
mct_nightly-2.3.0.20250604.611.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
2
|
+
model_compression_toolkit/__init__.py,sha256=MNjhJRHWg20p0VjHTzI6AhnDetVQ7Oy8YQrAOLRA-4Y,1557
|
3
3
|
model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
|
4
4
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
5
5
|
model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
|
6
6
|
model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_AhoQW8dNc,4003
|
7
7
|
model_compression_toolkit/verify_packages.py,sha256=l0neIRr8q_QwxmuiTI4vyCMDISDedK0EihjEQUe66tE,1319
|
8
|
-
model_compression_toolkit/core/__init__.py,sha256=
|
8
|
+
model_compression_toolkit/core/__init__.py,sha256=HNverPpoqEyFKTa7iEdOqqY2P0Gq-7GMejNOi6ZPcQs,2042
|
9
9
|
model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
|
10
10
|
model_compression_toolkit/core/graph_prep_runner.py,sha256=C6eUTd-fcgxk0LUbt51gFZwmyDDDEB8-9Q4kr9ujYvI,11555
|
11
11
|
model_compression_toolkit/core/quantization_prep_runner.py,sha256=DPevqQ8brkdut8K5f5v9g5lbT3r1GSmhLAk3NkL40Fg,6593
|
@@ -60,15 +60,15 @@ model_compression_toolkit/core/common/matchers/edge_matcher.py,sha256=bS9KIBhB6Y
|
|
60
60
|
model_compression_toolkit/core/common/matchers/function.py,sha256=kMwcinxn_PInvetNh_L_lqGXT1hoi3f97PqBpjqfXoA,1773
|
61
61
|
model_compression_toolkit/core/common/matchers/node_matcher.py,sha256=63cMwa5YbQ5LKZy8-KFmdchVc3N7mpDJ6fNDt_uAQsk,2745
|
62
62
|
model_compression_toolkit/core/common/matchers/walk_matcher.py,sha256=xqfLKk6xZt72hSnND_HoX5ESOooNMypb5VOZkVsJ_nw,1111
|
63
|
-
model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=
|
63
|
+
model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=Vlpo9M_1u6LHdEjYE3-wGc1esoH2NVhRzi3n_HTYvHs,789
|
64
64
|
model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=npqLPyk5xXR11M_zdImtSALc5vJv9N4fEapaludKLBw,7139
|
65
65
|
model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
|
66
66
|
model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
|
67
67
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=6pLUEEIqRTVIlCYQC4JIvY55KAvuBHEX8uTOQ-1Ac4Q,3859
|
68
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=
|
68
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=3vuhlpGvUdeGpEMfNIYONMN9NQ_VFIJykC6Gwlo-gaI,6728
|
69
69
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=axgAypzsiCOw04ZOtOEjK4riuNsaEU2qU6KkWnEXtMo,4951
|
70
70
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=1877xOUdgpWrXWyhdX1pJOePuopq43L71WqBFMqzyR4,6418
|
71
|
-
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=
|
71
|
+
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=3D_gHdcxsuINiTZqEAJXbxwYDg5qqXD51k4_smmWI9M,28553
|
72
72
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=MY8df-c_kITEr_7hOctaxhdiq29hSTA0La9Qo0oTJJY,9678
|
73
73
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
|
@@ -77,8 +77,7 @@ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools
|
|
77
77
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=6Z6nQL9UH7B8dbcUR0cuCTEYFOKZAlvOb-SCk_cAZFA,6670
|
79
79
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/__init__.py,sha256=5yxITHNJcCfeGKdIpAYbNbKDoXUSvENuRQm3OQu8Qf4,697
|
80
|
-
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/
|
81
|
-
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py,sha256=W4CySFtN874npcM9j9wu1PVrv7IZHLyKdLOPrTsCNQg,22209
|
80
|
+
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py,sha256=WhSsQDsmunfYlAU34-YUYnQXEiPBFf2Gl4FnyCn-3LE,21966
|
82
81
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py,sha256=5l0qP0mZ061xh3rjqTJZcLD2mMKC-hfSnNAN0OmSusk,8938
|
83
82
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/set_layer_to_bitwidth.py,sha256=Zn6SgzGLWWKmuYGHd1YtKxZdYnQWRDeXEkKlBiTbHcs,2929
|
84
83
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
@@ -135,7 +134,7 @@ model_compression_toolkit/core/common/statistics_correction/apply_activation_bia
|
|
135
134
|
model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py,sha256=b5clhUWGoDaQLn2pDCeYkV0FomVebcKS8pMXtQTTzIg,4679
|
136
135
|
model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py,sha256=C_nwhhitTd1pCto0nHZPn3fjIMOeDD7VIciumTR3s6k,5641
|
137
136
|
model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py,sha256=zIkhOPF6K5aIgMExpD7HFT9UZSDpvXh51F6V-qZ7H-4,9048
|
138
|
-
model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py,sha256=
|
137
|
+
model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py,sha256=Jditohcz1mGEYZTqNlzNyoRMsjZx0hsz1gkN1xux_HI,10596
|
139
138
|
model_compression_toolkit/core/common/statistics_correction/statistics_correction.py,sha256=E0ZA4edimJwpHh9twI5gafcoJ9fX5F1JX2QUOkUOKEw,6250
|
140
139
|
model_compression_toolkit/core/common/substitutions/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
141
140
|
model_compression_toolkit/core/common/substitutions/apply_substitutions.py,sha256=k-bifmakHIYZeZS-4T1QpZ1Et6AwAijMRgAKs7hmMKc,1390
|
@@ -530,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
530
529
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
|
531
530
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
532
531
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
533
|
-
mct_nightly-2.3.0.
|
534
|
-
mct_nightly-2.3.0.
|
535
|
-
mct_nightly-2.3.0.
|
536
|
-
mct_nightly-2.3.0.
|
532
|
+
mct_nightly-2.3.0.20250604.611.dist-info/METADATA,sha256=vtgFVscrMOf1owGEtgvDKYIE2oCQ2ewOg0-JbeDPI0k,25087
|
533
|
+
mct_nightly-2.3.0.20250604.611.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
534
|
+
mct_nightly-2.3.0.20250604.611.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
535
|
+
mct_nightly-2.3.0.20250604.611.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.3.0.
|
30
|
+
__version__ = "2.3.0.20250604.000611"
|
@@ -22,8 +22,8 @@ from model_compression_toolkit.core.common.quantization.quantization_config impo
|
|
22
22
|
from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
|
23
23
|
from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
|
24
24
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
25
|
-
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import
|
25
|
+
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import (
|
26
|
+
MixedPrecisionQuantizationConfig, MpDistanceWeighting, MpMetricNormalization)
|
26
27
|
from model_compression_toolkit.core.keras.resource_utilization_data_facade import keras_resource_utilization_data
|
27
28
|
from model_compression_toolkit.core.pytorch.resource_utilization_data_facade import pytorch_resource_utilization_data
|
28
|
-
from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.distance_weighting import MpDistanceWeighting
|
29
29
|
|
@@ -12,4 +12,4 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from .
|
15
|
+
from .mixed_precision_quantization_config import MpDistanceWeighting, MpMetricNormalization
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py
CHANGED
@@ -14,10 +14,25 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
from dataclasses import dataclass, field
|
17
|
-
from enum import Enum
|
17
|
+
from enum import Enum, auto
|
18
18
|
from typing import List, Callable, Optional
|
19
|
+
|
19
20
|
from model_compression_toolkit.constants import MP_DEFAULT_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
|
20
|
-
|
21
|
+
|
22
|
+
|
23
|
+
class MpDistanceWeighting(Enum):
|
24
|
+
"""
|
25
|
+
Defines interest points distances weighting methods.
|
26
|
+
|
27
|
+
AVG - take the average distance over all interest points.
|
28
|
+
LAST_LAYER - take only the distance of the last interest point.
|
29
|
+
EXP - weighted average with weights based on exponent of negative distances between activations of the quantized and the float models.
|
30
|
+
HESSIAN - weighted average with Hessians as weights.
|
31
|
+
"""
|
32
|
+
AVG = auto()
|
33
|
+
LAST_LAYER = auto()
|
34
|
+
EXP = auto()
|
35
|
+
HESSIAN = auto()
|
21
36
|
|
22
37
|
|
23
38
|
class MpMetricNormalization(Enum):
|
@@ -38,11 +53,12 @@ class MixedPrecisionQuantizationConfig:
|
|
38
53
|
|
39
54
|
Args:
|
40
55
|
compute_distance_fn (Callable): Function to compute a distance between two tensors. If None, using pre-defined distance methods based on the layer type for each layer.
|
41
|
-
distance_weighting_method (MpDistanceWeighting):
|
56
|
+
distance_weighting_method (MpDistanceWeighting): distance weighting method to use. By default, MpDistanceWeighting.AVG.
|
42
57
|
num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
|
43
58
|
configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
|
44
59
|
num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
|
45
|
-
use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric
|
60
|
+
use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric
|
61
|
+
computation. This is identical to passing distance_weighting_method=MpDistanceWeighting.HESSIAN.
|
46
62
|
norm_scores (bool): Whether to normalize the returned scores for the weighted distance metric (to get values between 0 and 1).
|
47
63
|
refine_mp_solution (bool): Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.
|
48
64
|
metric_normalization_threshold (float): A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.
|
@@ -51,12 +67,14 @@ class MixedPrecisionQuantizationConfig:
|
|
51
67
|
metric_epsilon (float | None): ensure minimal distance between the metric for any non-max-bidwidth candidate
|
52
68
|
and a max-bitwidth candidate, i.e. metric(non-max-bitwidth) >= metric(max-bitwidth) + epsilon.
|
53
69
|
If none, the computed metrics are used as is.
|
70
|
+
exp_distance_weighting_sigma (float): sigma for exponential weighting method. A distance for each interest point
|
71
|
+
is normalized by sigma prior to applying exponent.
|
54
72
|
custom_metric_fn (Callable): Function to compute a custom metric. As input gets the model_mp and returns a
|
55
73
|
float value for metric. If None, uses interest point metric.
|
56
74
|
|
57
75
|
"""
|
58
76
|
compute_distance_fn: Optional[Callable] = None
|
59
|
-
distance_weighting_method: MpDistanceWeighting =
|
77
|
+
distance_weighting_method: MpDistanceWeighting = None
|
60
78
|
num_of_images: int = MP_DEFAULT_NUM_SAMPLES
|
61
79
|
configuration_overwrite: Optional[List[int]] = None
|
62
80
|
num_interest_points_factor: float = field(default=1.0, metadata={"description": "Should be between 0.0 and 1.0"})
|
@@ -67,6 +85,7 @@ class MixedPrecisionQuantizationConfig:
|
|
67
85
|
hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
|
68
86
|
metric_normalization: MpMetricNormalization = MpMetricNormalization.NONE
|
69
87
|
metric_epsilon: Optional[float] = 1e-6
|
88
|
+
exp_distance_weighting_sigma: float = 0.1
|
70
89
|
custom_metric_fn: Optional[Callable] = None
|
71
90
|
_is_mixed_precision_enabled: bool = field(init=False, default=False)
|
72
91
|
|
@@ -77,6 +96,14 @@ class MixedPrecisionQuantizationConfig:
|
|
77
96
|
"the base set of interest points that are required to be " \
|
78
97
|
"used for mixed-precision metric evaluation, " \
|
79
98
|
"thus, it should be between 0 to 1"
|
99
|
+
if self.use_hessian_based_scores:
|
100
|
+
assert self.distance_weighting_method in [None, MpDistanceWeighting.HESSIAN], \
|
101
|
+
f'Distance method {self.distance_weighting_method} is incompatible with use_hessian_based_scores=True'
|
102
|
+
self.distance_weighting_method = MpDistanceWeighting.HESSIAN
|
103
|
+
elif self.distance_weighting_method is None and self.custom_metric_fn is None:
|
104
|
+
self.distance_weighting_method = MpDistanceWeighting.AVG
|
105
|
+
assert self.exp_distance_weighting_sigma > 0, (f'exp_distance_weighting_sigma should be positive, but got '
|
106
|
+
f'{self.exp_distance_weighting_sigma}')
|
80
107
|
|
81
108
|
def set_mixed_precision_enable(self):
|
82
109
|
"""
|
@@ -171,8 +171,6 @@ class MixedPrecisionSearchManager:
|
|
171
171
|
else: # pragma: no cover
|
172
172
|
raise ValueError(f'Unexpected MpMetricNormalization mode {norm_method}')
|
173
173
|
normalized_metrics = node_candidates_metrics / node_candidates_metrics[ref_ind]
|
174
|
-
if verbose and not np.array_equal(normalized_metrics, node_candidates_metrics):
|
175
|
-
print(f'{"normalized metric:":25}', candidates_sensitivity)
|
176
174
|
return normalized_metrics
|
177
175
|
|
178
176
|
def ensure_maxbit_minimal_metric(node_candidates_metrics, max_ind):
|
@@ -183,30 +181,41 @@ class MixedPrecisionSearchManager:
|
|
183
181
|
max_val = node_candidates_metrics[max_ind]
|
184
182
|
metrics = np.maximum(node_candidates_metrics, max_val + eps)
|
185
183
|
metrics[max_ind] = max_val
|
186
|
-
if verbose and not np.array_equal(metrics, node_candidates_metrics):
|
187
|
-
print(f'{"eps-adjusted metric:":25}', candidates_sensitivity)
|
188
184
|
return metrics
|
189
185
|
|
190
|
-
layer_to_metrics_mapping =
|
186
|
+
layer_to_metrics_mapping = {}
|
187
|
+
debug_mapping = {}
|
191
188
|
for node_idx, node in tqdm(enumerate(self.mp_topo_configurable_nodes)):
|
192
|
-
|
189
|
+
raw_candidates_sensitivity = np.empty(len(node.candidates_quantization_cfg))
|
193
190
|
for bitwidth_idx, _ in enumerate(node.candidates_quantization_cfg):
|
194
191
|
if self.using_virtual_graph:
|
195
192
|
a_cfg, w_cfg = self.config_reconstructor.reconstruct_separate_aw_configs({node: bitwidth_idx})
|
196
193
|
else:
|
197
194
|
a_cfg = {node: bitwidth_idx} if node.has_configurable_activation() else {}
|
198
195
|
w_cfg = {node: bitwidth_idx} if node.has_any_configurable_weight() else {}
|
199
|
-
|
196
|
+
raw_candidates_sensitivity[bitwidth_idx] = self.sensitivity_evaluator.compute_metric(
|
200
197
|
mp_a_cfg={n.name: ind for n, ind in a_cfg.items()},
|
201
198
|
mp_w_cfg={n.name: ind for n, ind in w_cfg.items()}
|
202
199
|
)
|
203
|
-
if verbose:
|
204
|
-
print(f'{node.name}\n{"raw metric:":25}', candidates_sensitivity)
|
205
200
|
max_ind = node.find_max_candidate_index()
|
206
|
-
|
207
|
-
candidates_sensitivity = ensure_maxbit_minimal_metric(
|
201
|
+
normalized_sensitivity = normalize(raw_candidates_sensitivity, max_ind)
|
202
|
+
candidates_sensitivity = ensure_maxbit_minimal_metric(normalized_sensitivity, max_ind)
|
208
203
|
layer_to_metrics_mapping[node] = candidates_sensitivity
|
209
204
|
|
205
|
+
if verbose: # pragma: no cover
|
206
|
+
debug_mapping[node] = {'': candidates_sensitivity}
|
207
|
+
if np.any(raw_candidates_sensitivity != candidates_sensitivity):
|
208
|
+
debug_mapping[node]['normalized'] = normalized_sensitivity
|
209
|
+
debug_mapping[node]['raw '] = raw_candidates_sensitivity
|
210
|
+
|
211
|
+
if verbose: # pragma: no cover
|
212
|
+
np.set_printoptions(precision=8, floatmode='maxprec')
|
213
|
+
name_len = max(len(n.name) for n in debug_mapping)
|
214
|
+
s = '\nMETRIC BEGIN'
|
215
|
+
for n, d in debug_mapping.items():
|
216
|
+
s += (f'\n{n.name:{name_len}}' + f'\n{" ":{name_len-10}}'.join([f'{k} {v}' for k, v in d.items()]))
|
217
|
+
s += '\nMETRIC END'
|
218
|
+
Logger.info(s)
|
210
219
|
# Finalize distance metric mapping
|
211
220
|
self._finalize_distance_metric(layer_to_metrics_mapping)
|
212
221
|
|
@@ -372,7 +381,7 @@ class ConfigReconstructionHelper:
|
|
372
381
|
w_candidates = orig_nodes_w_candidates[orig_node]
|
373
382
|
# find the common candidate
|
374
383
|
common_candidates = set(a_candidates).intersection(set(w_candidates))
|
375
|
-
if len(common_candidates) != 1:
|
384
|
+
if len(common_candidates) != 1: # pragma: no cover
|
376
385
|
raise ValueError(f'Expected to find exactly one candidate with the required activation and weights '
|
377
386
|
f'quantization configuration for node {orig_node}. Found {len(common_candidates)}')
|
378
387
|
# in theory it's possible that original non-configurable node gets split and each part is combined
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py
CHANGED
@@ -15,7 +15,7 @@
|
|
15
15
|
import numpy as np
|
16
16
|
from typing import runtime_checkable, Protocol, Callable, Any, List, Tuple
|
17
17
|
|
18
|
-
from model_compression_toolkit.core import MixedPrecisionQuantizationConfig,
|
18
|
+
from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig, MpDistanceWeighting
|
19
19
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
20
20
|
from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
|
21
21
|
HessianScoresGranularity
|
@@ -32,7 +32,7 @@ class MetricCalculator(Protocol):
|
|
32
32
|
|
33
33
|
def compute(self, mp_model) -> float:
|
34
34
|
""" Compute the metric for the given model. """
|
35
|
-
|
35
|
+
raise NotImplementedError # pragma: no cover
|
36
36
|
|
37
37
|
|
38
38
|
class CustomMetricCalculator(MetricCalculator):
|
@@ -66,16 +66,6 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
66
66
|
fw_impl: Any,
|
67
67
|
hessian_info_service: HessianInfoService = None):
|
68
68
|
"""
|
69
|
-
Initiates all relevant objects to manage a sensitivity evaluation for MP search.
|
70
|
-
Create an object that allows to compute the sensitivity metric of an MP model (the sensitivity
|
71
|
-
is computed based on the similarity of the interest points' outputs between the MP model
|
72
|
-
and the float model).
|
73
|
-
First, we initiate a SensitivityEvaluationManager that handles the components which are necessary for
|
74
|
-
evaluating the sensitivity. It initializes an MP model (a model where layers that can be configured in
|
75
|
-
different bit-widths) and a baseline model (a float model).
|
76
|
-
Then, and based on the outputs of these two models (for some batches from the representative_data_gen),
|
77
|
-
we build a function to measure the sensitivity of a change in a bit-width of a model's layer.
|
78
|
-
|
79
69
|
Args:
|
80
70
|
graph: Graph to search for its MP configuration.
|
81
71
|
mp_config: MP Quantization configuration for how the graph should be quantized.
|
@@ -91,11 +81,9 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
91
81
|
self.fw_info = fw_info
|
92
82
|
self.fw_impl = fw_impl
|
93
83
|
|
94
|
-
if self.mp_config.
|
95
|
-
|
96
|
-
|
97
|
-
f"When using Hessian-based approximations for sensitivity evaluation, a valid HessianInfoService object is required; found {type(hessian_info_service)}.")
|
98
|
-
self.hessian_info_service = hessian_info_service
|
84
|
+
if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
|
85
|
+
assert hessian_info_service is not None, ('Expected HessianInfoService object to be passed with Hessian '
|
86
|
+
'distance weighting')
|
99
87
|
|
100
88
|
self.sorted_configurable_nodes_names = graph.get_configurable_sorted_nodes_names(self.fw_info)
|
101
89
|
|
@@ -104,17 +92,12 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
104
92
|
self.interest_points = self.get_mp_interest_points(graph,
|
105
93
|
fw_impl.count_node_for_mixed_precision_interest_points,
|
106
94
|
mp_config.num_interest_points_factor)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
use_normalized_mse = self.mp_config.use_hessian_based_scores is False
|
111
|
-
self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points,
|
112
|
-
use_normalized_mse)
|
113
|
-
|
95
|
+
# exponential weighing assumes normalized distances, we only store it to be able to catch any changes
|
96
|
+
self.use_normalized_mse = True
|
97
|
+
self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points)
|
114
98
|
output_points = self.get_output_nodes_for_metric(graph)
|
115
99
|
self.all_interest_points = self.interest_points + output_points
|
116
|
-
self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(output_points
|
117
|
-
use_normalized_mse)
|
100
|
+
self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(output_points)
|
118
101
|
|
119
102
|
self.ref_model, _ = fw_impl.model_builder(graph, mode=ModelBuilderMode.FLOAT,
|
120
103
|
append2output=self.all_interest_points)
|
@@ -135,12 +118,10 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
135
118
|
# Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
|
136
119
|
self.baseline_tensors_list = self._init_baseline_tensors_list()
|
137
120
|
|
138
|
-
#
|
139
|
-
# and assigning distance_weighting method accordingly.
|
121
|
+
# Hessian-based scores for weighted average distance metric computation
|
140
122
|
self.interest_points_hessians = None
|
141
|
-
if self.mp_config.
|
142
|
-
self.interest_points_hessians = self._compute_hessian_based_scores()
|
143
|
-
self.mp_config.distance_weighting_method = lambda d: self.interest_points_hessians
|
123
|
+
if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
|
124
|
+
self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service)
|
144
125
|
|
145
126
|
def compute(self, mp_model) -> float:
|
146
127
|
"""
|
@@ -153,13 +134,11 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
153
134
|
Computed metric.
|
154
135
|
"""
|
155
136
|
ipts_distances, out_pts_distances = self._compute_distance(mp_model)
|
156
|
-
sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances
|
157
|
-
self.mp_config.distance_weighting_method)
|
137
|
+
sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances)
|
158
138
|
return sensitivity_metric
|
159
139
|
|
160
140
|
def _init_metric_points_lists(self,
|
161
|
-
points: List[BaseNode],
|
162
|
-
norm_mse: bool = False) -> Tuple[List[Callable], List[int]]:
|
141
|
+
points: List[BaseNode]) -> Tuple[List[Callable], List[int]]:
|
163
142
|
"""
|
164
143
|
Initiates required lists for future use when computing the sensitivity metric.
|
165
144
|
Each point on which the metric is computed uses a dedicated distance function based on its type.
|
@@ -167,7 +146,6 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
167
146
|
|
168
147
|
Args:
|
169
148
|
points: The set of nodes in the graph for which we need to initiate the lists.
|
170
|
-
norm_mse: whether to normalize mse distance function.
|
171
149
|
|
172
150
|
Returns: A lists with distance functions and an axis list for each node.
|
173
151
|
|
@@ -177,7 +155,7 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
177
155
|
for n in points:
|
178
156
|
distance_fn, axis = self.fw_impl.get_mp_node_distance_fn(n,
|
179
157
|
compute_distance_fn=self.mp_config.compute_distance_fn,
|
180
|
-
norm_mse=
|
158
|
+
norm_mse=self.use_normalized_mse)
|
181
159
|
distance_fns_list.append(distance_fn)
|
182
160
|
# Axis is needed only for KL Divergence calculation, otherwise we use per-tensor computation
|
183
161
|
axis_list.append(axis if distance_fn == compute_kl_divergence else None)
|
@@ -190,12 +168,14 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
190
168
|
return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.ref_model, images))
|
191
169
|
for images in self.images_batches]
|
192
170
|
|
193
|
-
def _compute_hessian_based_scores(self) -> np.ndarray:
|
171
|
+
def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService) -> np.ndarray:
|
194
172
|
"""
|
195
173
|
Compute Hessian-based scores for each interest point.
|
174
|
+
Args:
|
175
|
+
hessian_info_service: Hessian service.
|
196
176
|
|
197
|
-
Returns:
|
198
|
-
|
177
|
+
Returns:
|
178
|
+
A vector of scores, one for each interest point, to be used for the distance metric weighted average computation.
|
199
179
|
|
200
180
|
"""
|
201
181
|
# Create a request for Hessian approximation scores with specific configurations
|
@@ -209,12 +189,16 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
209
189
|
n_samples=self.mp_config.num_of_images)
|
210
190
|
|
211
191
|
# Fetch the Hessian approximation scores for the current interest point
|
212
|
-
nodes_approximations =
|
192
|
+
nodes_approximations = hessian_info_service.fetch_hessian(request=hessian_info_request)
|
213
193
|
approx_by_image = np.stack([nodes_approximations[n.name] for n in self.interest_points],
|
214
194
|
axis=1) # samples X nodes
|
215
195
|
|
216
196
|
# Return the mean approximation value across all images for each interest point
|
217
|
-
|
197
|
+
scores = np.mean(approx_by_image, axis=0)
|
198
|
+
if scores.ndim == 2 and scores.shape[1] == 1:
|
199
|
+
scores = np.squeeze(scores, 1)
|
200
|
+
assert scores.ndim == 1, f'Expected a vector of hessians, got tensor of shape {scores.shape}'
|
201
|
+
return scores
|
218
202
|
|
219
203
|
def _compute_points_distance(self,
|
220
204
|
baseline_tensors: List[Any],
|
@@ -282,39 +266,58 @@ class DistanceMetricCalculator(MetricCalculator):
|
|
282
266
|
|
283
267
|
return ipts_distances, out_pts_distances
|
284
268
|
|
285
|
-
|
286
|
-
def _compute_mp_distance_measure(ipts_distances: np.ndarray,
|
287
|
-
out_pts_distances: np.ndarray,
|
288
|
-
metrics_weights_fn: Callable) -> float:
|
269
|
+
def _compute_mp_distance_measure(self, ipts_distances: np.ndarray, out_pts_distances: np.ndarray) -> float:
|
289
270
|
"""
|
290
271
|
Computes the final distance value out of a distance matrix.
|
291
272
|
|
292
273
|
Args:
|
293
|
-
ipts_distances: A matrix that contains the distances between the
|
294
|
-
for each interest point.
|
295
|
-
out_pts_distances: A matrix that contains the distances between the
|
296
|
-
for each output point.
|
297
|
-
metrics_weights_fn: A callable that produces the scores to compute weighted distance for interest points.
|
274
|
+
ipts_distances: A matrix that contains the distances between the reference and MP models
|
275
|
+
for each interest point, of shape (num interest points, num samples,).
|
276
|
+
out_pts_distances: A matrix that contains the distances between the reference and MP models
|
277
|
+
for each output point, of shape (num output points, num samples,).
|
298
278
|
|
299
|
-
Returns:
|
279
|
+
Returns:
|
280
|
+
Distance value.
|
300
281
|
"""
|
301
|
-
|
302
|
-
if len(ipts_distances) > 0:
|
303
|
-
mean_distance_per_layer = ipts_distances.mean(axis=1)
|
282
|
+
assert ipts_distances.size + out_pts_distances.size, 'Both interest and output points distances are empty.'
|
304
283
|
|
305
|
-
|
306
|
-
weight_scores = metrics_weights_fn(ipts_distances)
|
307
|
-
weight_scores = np.asarray(weight_scores) if isinstance(weight_scores, List) else weight_scores
|
308
|
-
weight_scores = weight_scores.flatten()
|
284
|
+
ipts_metric = self._compute_ipts_distance_measure(ipts_distances) if ipts_distances.size else 0
|
309
285
|
|
310
|
-
|
286
|
+
out_pts_metric = out_pts_distances.mean() if out_pts_distances.size else 0
|
311
287
|
|
312
|
-
|
313
|
-
if len(out_pts_distances) > 0:
|
314
|
-
mean_distance_per_output = out_pts_distances.mean(axis=1)
|
315
|
-
mean_output_distance = np.average(mean_distance_per_output)
|
288
|
+
return ipts_metric + out_pts_metric
|
316
289
|
|
317
|
-
|
290
|
+
def _compute_ipts_distance_measure(self, ipts_distances: np.ndarray) -> float:
|
291
|
+
"""
|
292
|
+
Compute distance measure for interest points.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
ipts_distances: a matrix of shape (num interest points, num samples,).
|
296
|
+
|
297
|
+
Returns:
|
298
|
+
Distance measure.
|
299
|
+
"""
|
300
|
+
assert ipts_distances.ndim == 2, (f'Expected ipts_distances of shape shape (num interest points, num samples), '
|
301
|
+
f'got {ipts_distances.shape}')
|
302
|
+
method = self.mp_config.distance_weighting_method
|
303
|
+
if method == MpDistanceWeighting.AVG:
|
304
|
+
return ipts_distances.mean()
|
305
|
+
if method == MpDistanceWeighting.LAST_LAYER:
|
306
|
+
return ipts_distances[-1, :].mean()
|
307
|
+
if method == MpDistanceWeighting.HESSIAN:
|
308
|
+
return np.average(ipts_distances.mean(axis=1), weights=self.interest_points_hessians)
|
309
|
+
if method == MpDistanceWeighting.EXP:
|
310
|
+
assert self.use_normalized_mse
|
311
|
+
ipts_mean_distances = ipts_distances.mean(axis=1)
|
312
|
+
weights = 1 - np.exp(-ipts_mean_distances / self.mp_config.exp_distance_weighting_sigma)
|
313
|
+
if np.any(weights):
|
314
|
+
return np.average(ipts_mean_distances, weights=weights)
|
315
|
+
else:
|
316
|
+
Logger.warning('All weights for interest points are 0. If distances are very small, you might need to '
|
317
|
+
'pass a smaller exp_distance_weighting_sigma.')
|
318
|
+
return 0
|
319
|
+
|
320
|
+
raise ValueError(f'Unexpected MpDistanceWeighting {method}') # pragma: no cover
|
318
321
|
|
319
322
|
def _get_images_batches(self, num_of_images: int) -> List[Any]:
|
320
323
|
"""
|
model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py
CHANGED
@@ -115,7 +115,8 @@ def _compute_bias_correction(kernel: np.ndarray,
|
|
115
115
|
quantized_kernel: np.ndarray,
|
116
116
|
in_statistics_container: BaseStatsCollector,
|
117
117
|
output_channels_axis: int,
|
118
|
-
input_channels_axis: int
|
118
|
+
input_channels_axis: int,
|
119
|
+
node_name: str) -> Any:
|
119
120
|
"""
|
120
121
|
Compute the bias correction term for the bias in the error on the layer’s output,
|
121
122
|
that is introduced by the weights quantization.
|
@@ -154,14 +155,14 @@ def _compute_bias_correction(kernel: np.ndarray,
|
|
154
155
|
|
155
156
|
# Sanity validation
|
156
157
|
if is_non_positive_integer(num_groups) or is_non_positive_integer(num_out_channels / num_groups):
|
157
|
-
Logger.warning("Skipping bias correction due to
|
158
|
+
Logger.warning(f"Skipping bias correction due to validation problem in node {node_name}.")
|
158
159
|
return correction_term
|
159
160
|
|
160
161
|
num_out_channels_per_group = int(num_out_channels / num_groups)
|
161
162
|
|
162
163
|
# In Pytorch the output of group conv is separated into respective groups is
|
163
164
|
# viewed as follows: (batch, channel, ngroups, h, w),
|
164
|
-
# i.e each group is consistently viewed one after the other
|
165
|
+
# i.e. each group is consistently viewed one after the other
|
165
166
|
# For an example, check out: https://discuss.pytorch.org/t/group-convolution-output-order/88258
|
166
167
|
mu_split = np.split(mu, num_groups)
|
167
168
|
eps_split = np.split(eps, num_groups, 0)
|
@@ -205,5 +206,6 @@ def _get_bias_correction_term_of_node(input_channels_axis: int,
|
|
205
206
|
quantized_kernel,
|
206
207
|
node_in_stats_collector,
|
207
208
|
output_channels_axis,
|
208
|
-
input_channels_axis
|
209
|
+
input_channels_axis,
|
210
|
+
n.name)
|
209
211
|
return correction
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/distance_weighting.py
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
from enum import Enum
|
16
|
-
from functools import partial
|
17
|
-
|
18
|
-
import numpy as np
|
19
|
-
|
20
|
-
|
21
|
-
def get_average_weights(distance_matrix: np.ndarray) -> np.ndarray:
|
22
|
-
"""
|
23
|
-
Get weights for weighting the sensitivity among different layers when evaluating MP configurations on
|
24
|
-
model's sensitivity. This function returns equal weights for each layer, such that the sensitivity
|
25
|
-
is averaged over all layers.
|
26
|
-
|
27
|
-
Args:
|
28
|
-
distance_matrix: Numpy array at shape (L,M): L -number of interest points, M number of samples.
|
29
|
-
The matrix contain the distance for each interest point at each sample.
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
Numpy array containing equal weights for sensitivity weighting.
|
33
|
-
"""
|
34
|
-
|
35
|
-
num_nodes = len(distance_matrix)
|
36
|
-
return np.asarray([1 / num_nodes for _ in range(num_nodes)])
|
37
|
-
|
38
|
-
|
39
|
-
def get_last_layer_weights(distance_matrix: np.ndarray) -> np.ndarray:
|
40
|
-
"""
|
41
|
-
Get weights for weighting the sensitivity among different layers when evaluating MP configurations on
|
42
|
-
model's sensitivity. This function returns weights for each layer, such that the sensitivity
|
43
|
-
is computed using only the last layer of the model (all other weights are zero).
|
44
|
-
|
45
|
-
Args:
|
46
|
-
distance_matrix: Numpy array at shape (L,M): L -number of interest points, M number of samples.
|
47
|
-
The matrix contain the distance for each interest point at each sample.
|
48
|
-
|
49
|
-
Returns:
|
50
|
-
Numpy array containing weights for sensitivity weighting (all zero but the last one).
|
51
|
-
"""
|
52
|
-
num_nodes = len(distance_matrix)
|
53
|
-
w = np.asarray([0 for _ in range(num_nodes)])
|
54
|
-
w[-1] = 1
|
55
|
-
return w
|
56
|
-
|
57
|
-
|
58
|
-
class MpDistanceWeighting(Enum):
|
59
|
-
"""
|
60
|
-
Defines mixed precision distance metric weighting methods.
|
61
|
-
The enum values can be used to call a function on a set of arguments and key-arguments.
|
62
|
-
|
63
|
-
AVG - take the average distance on all computed layers.
|
64
|
-
|
65
|
-
LAST_LAYER - take only the distance of the last layer output.
|
66
|
-
|
67
|
-
"""
|
68
|
-
|
69
|
-
AVG = partial(get_average_weights)
|
70
|
-
LAST_LAYER = partial(get_last_layer_weights)
|
71
|
-
|
72
|
-
def __call__(self, distance_matrix: np.ndarray) -> np.ndarray:
|
73
|
-
return self.value(distance_matrix)
|
74
|
-
|
75
|
-
def __deepcopy__(self, memo):
|
76
|
-
return self
|
File without changes
|
File without changes
|
{mct_nightly-2.3.0.20250602.610.dist-info → mct_nightly-2.3.0.20250604.611.dist-info}/top_level.txt
RENAMED
File without changes
|