mct-nightly 2.3.0.20250603.606__py3-none-any.whl → 2.3.0.20250604.611__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mct-nightly
3
- Version: 2.3.0.20250603.606
3
+ Version: 2.3.0.20250604.611
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Author-email: ssi-dnn-dev@sony.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -236,6 +236,6 @@ MCT is licensed under Apache License Version 2.0. By contributing to the project
236
236
 
237
237
  [3] [TORCHVISION.MODELS](https://pytorch.org/vision/stable/models.html)
238
238
 
239
- [4] Gordon, O., Cohen, E., Habi, H. V., & Netzer, A., 2024. [EPTQ: Enhanced Post-Training Quantization via Hessian-guided Network-wise Optimization, European Conference on Computer Vision Workshop 2024, Computational Aspects of Deep Learning (CADL)](https://arxiv.org/abs/2309.11531)
239
+ [4] Gordon, O., Cohen, E., Habi, H.V., Netzer, A. (2025). [EPTQ: Enhanced Post-Training Quantization via Hessian-guided Network-wise Optimization ECCV 2024 Workshops](https://link.springer.com/chapter/10.1007/978-3-031-91979-4_13)
240
240
 
241
241
  [5] Dikstein, L., Lapid, A., Netzer, A., & Habi, H. V., 2024. [Data Generation for Hardware-Friendly Post-Training Quantization, Accepted to IEEE/CVF Winter Conference on Applications of Computer Vision (WACV) 2025](https://openaccess.thecvf.com/content/WACV2025/papers/Dikstein_Data_Generation_for_Hardware-Friendly_Post-Training_Quantization_WACV_2025_paper.pdf)
@@ -1,11 +1,11 @@
1
- mct_nightly-2.3.0.20250603.606.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
- model_compression_toolkit/__init__.py,sha256=IAmvbE3KF66i3EYBjVG_26WsspUM8gYHQemlA1TKkTI,1557
1
+ mct_nightly-2.3.0.20250604.611.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
+ model_compression_toolkit/__init__.py,sha256=MNjhJRHWg20p0VjHTzI6AhnDetVQ7Oy8YQrAOLRA-4Y,1557
3
3
  model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
4
4
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
5
5
  model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
6
6
  model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_AhoQW8dNc,4003
7
7
  model_compression_toolkit/verify_packages.py,sha256=l0neIRr8q_QwxmuiTI4vyCMDISDedK0EihjEQUe66tE,1319
8
- model_compression_toolkit/core/__init__.py,sha256=phfdtc09uruSyOpWRaUMUeMNRSwYB5q9NBus3cqcjIM,2113
8
+ model_compression_toolkit/core/__init__.py,sha256=HNverPpoqEyFKTa7iEdOqqY2P0Gq-7GMejNOi6ZPcQs,2042
9
9
  model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
10
10
  model_compression_toolkit/core/graph_prep_runner.py,sha256=C6eUTd-fcgxk0LUbt51gFZwmyDDDEB8-9Q4kr9ujYvI,11555
11
11
  model_compression_toolkit/core/quantization_prep_runner.py,sha256=DPevqQ8brkdut8K5f5v9g5lbT3r1GSmhLAk3NkL40Fg,6593
@@ -60,15 +60,15 @@ model_compression_toolkit/core/common/matchers/edge_matcher.py,sha256=bS9KIBhB6Y
60
60
  model_compression_toolkit/core/common/matchers/function.py,sha256=kMwcinxn_PInvetNh_L_lqGXT1hoi3f97PqBpjqfXoA,1773
61
61
  model_compression_toolkit/core/common/matchers/node_matcher.py,sha256=63cMwa5YbQ5LKZy8-KFmdchVc3N7mpDJ6fNDt_uAQsk,2745
62
62
  model_compression_toolkit/core/common/matchers/walk_matcher.py,sha256=xqfLKk6xZt72hSnND_HoX5ESOooNMypb5VOZkVsJ_nw,1111
63
- model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=Jm6pls3QUCMQ9d86KOYxOq05br_k130ByGHLCojIZ_M,766
63
+ model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=Vlpo9M_1u6LHdEjYE3-wGc1esoH2NVhRzi3n_HTYvHs,789
64
64
  model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=npqLPyk5xXR11M_zdImtSALc5vJv9N4fEapaludKLBw,7139
65
65
  model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
66
66
  model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
67
67
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=6pLUEEIqRTVIlCYQC4JIvY55KAvuBHEX8uTOQ-1Ac4Q,3859
68
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=rdtxPmRhjrC160O3fqAjDzGxpMeM49hYhmlnf_Kwqds,5416
68
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=3vuhlpGvUdeGpEMfNIYONMN9NQ_VFIJykC6Gwlo-gaI,6728
69
69
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=axgAypzsiCOw04ZOtOEjK4riuNsaEU2qU6KkWnEXtMo,4951
70
70
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=1877xOUdgpWrXWyhdX1pJOePuopq43L71WqBFMqzyR4,6418
71
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=TAxA9BKxINwUQfJpmf2Qghz-5DTbesuf1Pe1L0Tc-j4,28157
71
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=3D_gHdcxsuINiTZqEAJXbxwYDg5qqXD51k4_smmWI9M,28553
72
72
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=MY8df-c_kITEr_7hOctaxhdiq29hSTA0La9Qo0oTJJY,9678
73
73
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
74
74
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
@@ -77,8 +77,7 @@ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools
77
77
  model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
78
78
  model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=6Z6nQL9UH7B8dbcUR0cuCTEYFOKZAlvOb-SCk_cAZFA,6670
79
79
  model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/__init__.py,sha256=5yxITHNJcCfeGKdIpAYbNbKDoXUSvENuRQm3OQu8Qf4,697
80
- model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
81
- model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py,sha256=W4CySFtN874npcM9j9wu1PVrv7IZHLyKdLOPrTsCNQg,22209
80
+ model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py,sha256=WhSsQDsmunfYlAU34-YUYnQXEiPBFf2Gl4FnyCn-3LE,21966
82
81
  model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py,sha256=5l0qP0mZ061xh3rjqTJZcLD2mMKC-hfSnNAN0OmSusk,8938
83
82
  model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/set_layer_to_bitwidth.py,sha256=Zn6SgzGLWWKmuYGHd1YtKxZdYnQWRDeXEkKlBiTbHcs,2929
84
83
  model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
@@ -135,7 +134,7 @@ model_compression_toolkit/core/common/statistics_correction/apply_activation_bia
135
134
  model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py,sha256=b5clhUWGoDaQLn2pDCeYkV0FomVebcKS8pMXtQTTzIg,4679
136
135
  model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py,sha256=C_nwhhitTd1pCto0nHZPn3fjIMOeDD7VIciumTR3s6k,5641
137
136
  model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py,sha256=zIkhOPF6K5aIgMExpD7HFT9UZSDpvXh51F6V-qZ7H-4,9048
138
- model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py,sha256=LaGhYES7HgIDf9Bi2KAG_mBzAWuum0J6AGmAFPC8wwo,10478
137
+ model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py,sha256=Jditohcz1mGEYZTqNlzNyoRMsjZx0hsz1gkN1xux_HI,10596
139
138
  model_compression_toolkit/core/common/statistics_correction/statistics_correction.py,sha256=E0ZA4edimJwpHh9twI5gafcoJ9fX5F1JX2QUOkUOKEw,6250
140
139
  model_compression_toolkit/core/common/substitutions/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
141
140
  model_compression_toolkit/core/common/substitutions/apply_substitutions.py,sha256=k-bifmakHIYZeZS-4T1QpZ1Et6AwAijMRgAKs7hmMKc,1390
@@ -530,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
530
529
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
531
530
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
532
531
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
533
- mct_nightly-2.3.0.20250603.606.dist-info/METADATA,sha256=uaVhWdhpN3_zGvI8KT4dcZDayQIVwhN-_tHBip8cqg8,25136
534
- mct_nightly-2.3.0.20250603.606.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
535
- mct_nightly-2.3.0.20250603.606.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
536
- mct_nightly-2.3.0.20250603.606.dist-info/RECORD,,
532
+ mct_nightly-2.3.0.20250604.611.dist-info/METADATA,sha256=vtgFVscrMOf1owGEtgvDKYIE2oCQ2ewOg0-JbeDPI0k,25087
533
+ mct_nightly-2.3.0.20250604.611.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
534
+ mct_nightly-2.3.0.20250604.611.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
535
+ mct_nightly-2.3.0.20250604.611.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.3.0.20250603.000606"
30
+ __version__ = "2.3.0.20250604.000611"
@@ -22,8 +22,8 @@ from model_compression_toolkit.core.common.quantization.quantization_config impo
22
22
  from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
23
23
  from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
24
24
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
25
- from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
25
+ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import (
26
+ MixedPrecisionQuantizationConfig, MpDistanceWeighting, MpMetricNormalization)
26
27
  from model_compression_toolkit.core.keras.resource_utilization_data_facade import keras_resource_utilization_data
27
28
  from model_compression_toolkit.core.pytorch.resource_utilization_data_facade import pytorch_resource_utilization_data
28
- from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.distance_weighting import MpDistanceWeighting
29
29
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from .sensitivity_eval.distance_weighting import MpDistanceWeighting
15
+ from .mixed_precision_quantization_config import MpDistanceWeighting, MpMetricNormalization
@@ -14,10 +14,25 @@
14
14
  # ==============================================================================
15
15
 
16
16
  from dataclasses import dataclass, field
17
- from enum import Enum
17
+ from enum import Enum, auto
18
18
  from typing import List, Callable, Optional
19
+
19
20
  from model_compression_toolkit.constants import MP_DEFAULT_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
20
- from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.distance_weighting import MpDistanceWeighting
21
+
22
+
23
+ class MpDistanceWeighting(Enum):
24
+ """
25
+ Defines interest points distances weighting methods.
26
+
27
+ AVG - take the average distance over all interest points.
28
+ LAST_LAYER - take only the distance of the last interest point.
29
+ EXP - weighted average with weights based on exponent of negative distances between activations of the quantized and the float models.
30
+ HESSIAN - weighted average with Hessians as weights.
31
+ """
32
+ AVG = auto()
33
+ LAST_LAYER = auto()
34
+ EXP = auto()
35
+ HESSIAN = auto()
21
36
 
22
37
 
23
38
  class MpMetricNormalization(Enum):
@@ -38,11 +53,12 @@ class MixedPrecisionQuantizationConfig:
38
53
 
39
54
  Args:
40
55
  compute_distance_fn (Callable): Function to compute a distance between two tensors. If None, using pre-defined distance methods based on the layer type for each layer.
41
- distance_weighting_method (MpDistanceWeighting): MpDistanceWeighting enum value that provides a function to use when weighting the distances among different layers when computing the sensitivity metric.
56
+ distance_weighting_method (MpDistanceWeighting): distance weighting method to use. By default, MpDistanceWeighting.AVG.
42
57
  num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
43
58
  configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
44
59
  num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
45
- use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric computation.
60
+ use_hessian_based_scores (bool): Whether to use Hessian-based scores for weighted average distance metric
61
+ computation. This is identical to passing distance_weighting_method=MpDistanceWeighting.HESSIAN.
46
62
  norm_scores (bool): Whether to normalize the returned scores for the weighted distance metric (to get values between 0 and 1).
47
63
  refine_mp_solution (bool): Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.
48
64
  metric_normalization_threshold (float): A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.
@@ -51,12 +67,14 @@ class MixedPrecisionQuantizationConfig:
51
67
  metric_epsilon (float | None): ensure minimal distance between the metric for any non-max-bidwidth candidate
52
68
  and a max-bitwidth candidate, i.e. metric(non-max-bitwidth) >= metric(max-bitwidth) + epsilon.
53
69
  If none, the computed metrics are used as is.
70
+ exp_distance_weighting_sigma (float): sigma for exponential weighting method. A distance for each interest point
71
+ is normalized by sigma prior to applying exponent.
54
72
  custom_metric_fn (Callable): Function to compute a custom metric. As input gets the model_mp and returns a
55
73
  float value for metric. If None, uses interest point metric.
56
74
 
57
75
  """
58
76
  compute_distance_fn: Optional[Callable] = None
59
- distance_weighting_method: MpDistanceWeighting = MpDistanceWeighting.AVG
77
+ distance_weighting_method: MpDistanceWeighting = None
60
78
  num_of_images: int = MP_DEFAULT_NUM_SAMPLES
61
79
  configuration_overwrite: Optional[List[int]] = None
62
80
  num_interest_points_factor: float = field(default=1.0, metadata={"description": "Should be between 0.0 and 1.0"})
@@ -67,6 +85,7 @@ class MixedPrecisionQuantizationConfig:
67
85
  hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
68
86
  metric_normalization: MpMetricNormalization = MpMetricNormalization.NONE
69
87
  metric_epsilon: Optional[float] = 1e-6
88
+ exp_distance_weighting_sigma: float = 0.1
70
89
  custom_metric_fn: Optional[Callable] = None
71
90
  _is_mixed_precision_enabled: bool = field(init=False, default=False)
72
91
 
@@ -77,6 +96,14 @@ class MixedPrecisionQuantizationConfig:
77
96
  "the base set of interest points that are required to be " \
78
97
  "used for mixed-precision metric evaluation, " \
79
98
  "thus, it should be between 0 to 1"
99
+ if self.use_hessian_based_scores:
100
+ assert self.distance_weighting_method in [None, MpDistanceWeighting.HESSIAN], \
101
+ f'Distance method {self.distance_weighting_method} is incompatible with use_hessian_based_scores=True'
102
+ self.distance_weighting_method = MpDistanceWeighting.HESSIAN
103
+ elif self.distance_weighting_method is None and self.custom_metric_fn is None:
104
+ self.distance_weighting_method = MpDistanceWeighting.AVG
105
+ assert self.exp_distance_weighting_sigma > 0, (f'exp_distance_weighting_sigma should be positive, but got '
106
+ f'{self.exp_distance_weighting_sigma}')
80
107
 
81
108
  def set_mixed_precision_enable(self):
82
109
  """
@@ -171,8 +171,6 @@ class MixedPrecisionSearchManager:
171
171
  else: # pragma: no cover
172
172
  raise ValueError(f'Unexpected MpMetricNormalization mode {norm_method}')
173
173
  normalized_metrics = node_candidates_metrics / node_candidates_metrics[ref_ind]
174
- if verbose and not np.array_equal(normalized_metrics, node_candidates_metrics):
175
- print(f'{"normalized metric:":25}', candidates_sensitivity)
176
174
  return normalized_metrics
177
175
 
178
176
  def ensure_maxbit_minimal_metric(node_candidates_metrics, max_ind):
@@ -183,30 +181,41 @@ class MixedPrecisionSearchManager:
183
181
  max_val = node_candidates_metrics[max_ind]
184
182
  metrics = np.maximum(node_candidates_metrics, max_val + eps)
185
183
  metrics[max_ind] = max_val
186
- if verbose and not np.array_equal(metrics, node_candidates_metrics):
187
- print(f'{"eps-adjusted metric:":25}', candidates_sensitivity)
188
184
  return metrics
189
185
 
190
- layer_to_metrics_mapping = defaultdict(list)
186
+ layer_to_metrics_mapping = {}
187
+ debug_mapping = {}
191
188
  for node_idx, node in tqdm(enumerate(self.mp_topo_configurable_nodes)):
192
- candidates_sensitivity = np.empty(len(node.candidates_quantization_cfg))
189
+ raw_candidates_sensitivity = np.empty(len(node.candidates_quantization_cfg))
193
190
  for bitwidth_idx, _ in enumerate(node.candidates_quantization_cfg):
194
191
  if self.using_virtual_graph:
195
192
  a_cfg, w_cfg = self.config_reconstructor.reconstruct_separate_aw_configs({node: bitwidth_idx})
196
193
  else:
197
194
  a_cfg = {node: bitwidth_idx} if node.has_configurable_activation() else {}
198
195
  w_cfg = {node: bitwidth_idx} if node.has_any_configurable_weight() else {}
199
- candidates_sensitivity[bitwidth_idx] = self.sensitivity_evaluator.compute_metric(
196
+ raw_candidates_sensitivity[bitwidth_idx] = self.sensitivity_evaluator.compute_metric(
200
197
  mp_a_cfg={n.name: ind for n, ind in a_cfg.items()},
201
198
  mp_w_cfg={n.name: ind for n, ind in w_cfg.items()}
202
199
  )
203
- if verbose:
204
- print(f'{node.name}\n{"raw metric:":25}', candidates_sensitivity)
205
200
  max_ind = node.find_max_candidate_index()
206
- candidates_sensitivity = normalize(candidates_sensitivity, max_ind)
207
- candidates_sensitivity = ensure_maxbit_minimal_metric(candidates_sensitivity, max_ind)
201
+ normalized_sensitivity = normalize(raw_candidates_sensitivity, max_ind)
202
+ candidates_sensitivity = ensure_maxbit_minimal_metric(normalized_sensitivity, max_ind)
208
203
  layer_to_metrics_mapping[node] = candidates_sensitivity
209
204
 
205
+ if verbose: # pragma: no cover
206
+ debug_mapping[node] = {'': candidates_sensitivity}
207
+ if np.any(raw_candidates_sensitivity != candidates_sensitivity):
208
+ debug_mapping[node]['normalized'] = normalized_sensitivity
209
+ debug_mapping[node]['raw '] = raw_candidates_sensitivity
210
+
211
+ if verbose: # pragma: no cover
212
+ np.set_printoptions(precision=8, floatmode='maxprec')
213
+ name_len = max(len(n.name) for n in debug_mapping)
214
+ s = '\nMETRIC BEGIN'
215
+ for n, d in debug_mapping.items():
216
+ s += (f'\n{n.name:{name_len}}' + f'\n{" ":{name_len-10}}'.join([f'{k} {v}' for k, v in d.items()]))
217
+ s += '\nMETRIC END'
218
+ Logger.info(s)
210
219
  # Finalize distance metric mapping
211
220
  self._finalize_distance_metric(layer_to_metrics_mapping)
212
221
 
@@ -372,7 +381,7 @@ class ConfigReconstructionHelper:
372
381
  w_candidates = orig_nodes_w_candidates[orig_node]
373
382
  # find the common candidate
374
383
  common_candidates = set(a_candidates).intersection(set(w_candidates))
375
- if len(common_candidates) != 1:
384
+ if len(common_candidates) != 1: # pragma: no cover
376
385
  raise ValueError(f'Expected to find exactly one candidate with the required activation and weights '
377
386
  f'quantization configuration for node {orig_node}. Found {len(common_candidates)}')
378
387
  # in theory it's possible that original non-configurable node gets split and each part is combined
@@ -15,7 +15,7 @@
15
15
  import numpy as np
16
16
  from typing import runtime_checkable, Protocol, Callable, Any, List, Tuple
17
17
 
18
- from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, FrameworkInfo
18
+ from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig, MpDistanceWeighting
19
19
  from model_compression_toolkit.core.common import Graph, BaseNode
20
20
  from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
21
21
  HessianScoresGranularity
@@ -32,7 +32,7 @@ class MetricCalculator(Protocol):
32
32
 
33
33
  def compute(self, mp_model) -> float:
34
34
  """ Compute the metric for the given model. """
35
- ...
35
+ raise NotImplementedError # pragma: no cover
36
36
 
37
37
 
38
38
  class CustomMetricCalculator(MetricCalculator):
@@ -66,16 +66,6 @@ class DistanceMetricCalculator(MetricCalculator):
66
66
  fw_impl: Any,
67
67
  hessian_info_service: HessianInfoService = None):
68
68
  """
69
- Initiates all relevant objects to manage a sensitivity evaluation for MP search.
70
- Create an object that allows to compute the sensitivity metric of an MP model (the sensitivity
71
- is computed based on the similarity of the interest points' outputs between the MP model
72
- and the float model).
73
- First, we initiate a SensitivityEvaluationManager that handles the components which are necessary for
74
- evaluating the sensitivity. It initializes an MP model (a model where layers that can be configured in
75
- different bit-widths) and a baseline model (a float model).
76
- Then, and based on the outputs of these two models (for some batches from the representative_data_gen),
77
- we build a function to measure the sensitivity of a change in a bit-width of a model's layer.
78
-
79
69
  Args:
80
70
  graph: Graph to search for its MP configuration.
81
71
  mp_config: MP Quantization configuration for how the graph should be quantized.
@@ -91,11 +81,9 @@ class DistanceMetricCalculator(MetricCalculator):
91
81
  self.fw_info = fw_info
92
82
  self.fw_impl = fw_impl
93
83
 
94
- if self.mp_config.use_hessian_based_scores:
95
- if not isinstance(hessian_info_service, HessianInfoService): # pragma: no cover
96
- Logger.critical(
97
- f"When using Hessian-based approximations for sensitivity evaluation, a valid HessianInfoService object is required; found {type(hessian_info_service)}.")
98
- self.hessian_info_service = hessian_info_service
84
+ if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
85
+ assert hessian_info_service is not None, ('Expected HessianInfoService object to be passed with Hessian '
86
+ 'distance weighting')
99
87
 
100
88
  self.sorted_configurable_nodes_names = graph.get_configurable_sorted_nodes_names(self.fw_info)
101
89
 
@@ -104,17 +92,12 @@ class DistanceMetricCalculator(MetricCalculator):
104
92
  self.interest_points = self.get_mp_interest_points(graph,
105
93
  fw_impl.count_node_for_mixed_precision_interest_points,
106
94
  mp_config.num_interest_points_factor)
107
-
108
- # We use normalized MSE when not running hessian-based. For Hessian-based normalized MSE is not needed
109
- # because hessian weights already do normalization.
110
- use_normalized_mse = self.mp_config.use_hessian_based_scores is False
111
- self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points,
112
- use_normalized_mse)
113
-
95
+ # exponential weighing assumes normalized distances, we only store it to be able to catch any changes
96
+ self.use_normalized_mse = True
97
+ self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points)
114
98
  output_points = self.get_output_nodes_for_metric(graph)
115
99
  self.all_interest_points = self.interest_points + output_points
116
- self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(output_points,
117
- use_normalized_mse)
100
+ self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(output_points)
118
101
 
119
102
  self.ref_model, _ = fw_impl.model_builder(graph, mode=ModelBuilderMode.FLOAT,
120
103
  append2output=self.all_interest_points)
@@ -135,12 +118,10 @@ class DistanceMetricCalculator(MetricCalculator):
135
118
  # Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
136
119
  self.baseline_tensors_list = self._init_baseline_tensors_list()
137
120
 
138
- # Computing Hessian-based scores for weighted average distance metric computation (only if requested),
139
- # and assigning distance_weighting method accordingly.
121
+ # Hessian-based scores for weighted average distance metric computation
140
122
  self.interest_points_hessians = None
141
- if self.mp_config.use_hessian_based_scores is True:
142
- self.interest_points_hessians = self._compute_hessian_based_scores()
143
- self.mp_config.distance_weighting_method = lambda d: self.interest_points_hessians
123
+ if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
124
+ self.interest_points_hessians = self._compute_hessian_based_scores(hessian_info_service)
144
125
 
145
126
  def compute(self, mp_model) -> float:
146
127
  """
@@ -153,13 +134,11 @@ class DistanceMetricCalculator(MetricCalculator):
153
134
  Computed metric.
154
135
  """
155
136
  ipts_distances, out_pts_distances = self._compute_distance(mp_model)
156
- sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
157
- self.mp_config.distance_weighting_method)
137
+ sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances)
158
138
  return sensitivity_metric
159
139
 
160
140
  def _init_metric_points_lists(self,
161
- points: List[BaseNode],
162
- norm_mse: bool = False) -> Tuple[List[Callable], List[int]]:
141
+ points: List[BaseNode]) -> Tuple[List[Callable], List[int]]:
163
142
  """
164
143
  Initiates required lists for future use when computing the sensitivity metric.
165
144
  Each point on which the metric is computed uses a dedicated distance function based on its type.
@@ -167,7 +146,6 @@ class DistanceMetricCalculator(MetricCalculator):
167
146
 
168
147
  Args:
169
148
  points: The set of nodes in the graph for which we need to initiate the lists.
170
- norm_mse: whether to normalize mse distance function.
171
149
 
172
150
  Returns: A lists with distance functions and an axis list for each node.
173
151
 
@@ -177,7 +155,7 @@ class DistanceMetricCalculator(MetricCalculator):
177
155
  for n in points:
178
156
  distance_fn, axis = self.fw_impl.get_mp_node_distance_fn(n,
179
157
  compute_distance_fn=self.mp_config.compute_distance_fn,
180
- norm_mse=norm_mse)
158
+ norm_mse=self.use_normalized_mse)
181
159
  distance_fns_list.append(distance_fn)
182
160
  # Axis is needed only for KL Divergence calculation, otherwise we use per-tensor computation
183
161
  axis_list.append(axis if distance_fn == compute_kl_divergence else None)
@@ -190,12 +168,14 @@ class DistanceMetricCalculator(MetricCalculator):
190
168
  return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.ref_model, images))
191
169
  for images in self.images_batches]
192
170
 
193
- def _compute_hessian_based_scores(self) -> np.ndarray:
171
+ def _compute_hessian_based_scores(self, hessian_info_service: HessianInfoService) -> np.ndarray:
194
172
  """
195
173
  Compute Hessian-based scores for each interest point.
174
+ Args:
175
+ hessian_info_service: Hessian service.
196
176
 
197
- Returns: A vector of scores, one for each interest point,
198
- to be used for the distance metric weighted average computation.
177
+ Returns:
178
+ A vector of scores, one for each interest point, to be used for the distance metric weighted average computation.
199
179
 
200
180
  """
201
181
  # Create a request for Hessian approximation scores with specific configurations
@@ -209,12 +189,16 @@ class DistanceMetricCalculator(MetricCalculator):
209
189
  n_samples=self.mp_config.num_of_images)
210
190
 
211
191
  # Fetch the Hessian approximation scores for the current interest point
212
- nodes_approximations = self.hessian_info_service.fetch_hessian(request=hessian_info_request)
192
+ nodes_approximations = hessian_info_service.fetch_hessian(request=hessian_info_request)
213
193
  approx_by_image = np.stack([nodes_approximations[n.name] for n in self.interest_points],
214
194
  axis=1) # samples X nodes
215
195
 
216
196
  # Return the mean approximation value across all images for each interest point
217
- return np.mean(approx_by_image, axis=0)
197
+ scores = np.mean(approx_by_image, axis=0)
198
+ if scores.ndim == 2 and scores.shape[1] == 1:
199
+ scores = np.squeeze(scores, 1)
200
+ assert scores.ndim == 1, f'Expected a vector of hessians, got tensor of shape {scores.shape}'
201
+ return scores
218
202
 
219
203
  def _compute_points_distance(self,
220
204
  baseline_tensors: List[Any],
@@ -282,39 +266,58 @@ class DistanceMetricCalculator(MetricCalculator):
282
266
 
283
267
  return ipts_distances, out_pts_distances
284
268
 
285
- @staticmethod
286
- def _compute_mp_distance_measure(ipts_distances: np.ndarray,
287
- out_pts_distances: np.ndarray,
288
- metrics_weights_fn: Callable) -> float:
269
+ def _compute_mp_distance_measure(self, ipts_distances: np.ndarray, out_pts_distances: np.ndarray) -> float:
289
270
  """
290
271
  Computes the final distance value out of a distance matrix.
291
272
 
292
273
  Args:
293
- ipts_distances: A matrix that contains the distances between the baseline and MP models
294
- for each interest point.
295
- out_pts_distances: A matrix that contains the distances between the baseline and MP models
296
- for each output point.
297
- metrics_weights_fn: A callable that produces the scores to compute weighted distance for interest points.
274
+ ipts_distances: A matrix that contains the distances between the reference and MP models
275
+ for each interest point, of shape (num interest points, num samples,).
276
+ out_pts_distances: A matrix that contains the distances between the reference and MP models
277
+ for each output point, of shape (num output points, num samples,).
298
278
 
299
- Returns: Distance value.
279
+ Returns:
280
+ Distance value.
300
281
  """
301
- mean_ipts_distance = 0
302
- if len(ipts_distances) > 0:
303
- mean_distance_per_layer = ipts_distances.mean(axis=1)
282
+ assert ipts_distances.size + out_pts_distances.size, 'Both interest and output points distances are empty.'
304
283
 
305
- # Use weights such that every layer's distance is weighted differently (possibly).
306
- weight_scores = metrics_weights_fn(ipts_distances)
307
- weight_scores = np.asarray(weight_scores) if isinstance(weight_scores, List) else weight_scores
308
- weight_scores = weight_scores.flatten()
284
+ ipts_metric = self._compute_ipts_distance_measure(ipts_distances) if ipts_distances.size else 0
309
285
 
310
- mean_ipts_distance = np.average(mean_distance_per_layer, weights=weight_scores)
286
+ out_pts_metric = out_pts_distances.mean() if out_pts_distances.size else 0
311
287
 
312
- mean_output_distance = 0
313
- if len(out_pts_distances) > 0:
314
- mean_distance_per_output = out_pts_distances.mean(axis=1)
315
- mean_output_distance = np.average(mean_distance_per_output)
288
+ return ipts_metric + out_pts_metric
316
289
 
317
- return mean_output_distance + mean_ipts_distance
290
+ def _compute_ipts_distance_measure(self, ipts_distances: np.ndarray) -> float:
291
+ """
292
+ Compute distance measure for interest points.
293
+
294
+ Args:
295
+ ipts_distances: a matrix of shape (num interest points, num samples,).
296
+
297
+ Returns:
298
+ Distance measure.
299
+ """
300
+ assert ipts_distances.ndim == 2, (f'Expected ipts_distances of shape shape (num interest points, num samples), '
301
+ f'got {ipts_distances.shape}')
302
+ method = self.mp_config.distance_weighting_method
303
+ if method == MpDistanceWeighting.AVG:
304
+ return ipts_distances.mean()
305
+ if method == MpDistanceWeighting.LAST_LAYER:
306
+ return ipts_distances[-1, :].mean()
307
+ if method == MpDistanceWeighting.HESSIAN:
308
+ return np.average(ipts_distances.mean(axis=1), weights=self.interest_points_hessians)
309
+ if method == MpDistanceWeighting.EXP:
310
+ assert self.use_normalized_mse
311
+ ipts_mean_distances = ipts_distances.mean(axis=1)
312
+ weights = 1 - np.exp(-ipts_mean_distances / self.mp_config.exp_distance_weighting_sigma)
313
+ if np.any(weights):
314
+ return np.average(ipts_mean_distances, weights=weights)
315
+ else:
316
+ Logger.warning('All weights for interest points are 0. If distances are very small, you might need to '
317
+ 'pass a smaller exp_distance_weighting_sigma.')
318
+ return 0
319
+
320
+ raise ValueError(f'Unexpected MpDistanceWeighting {method}') # pragma: no cover
318
321
 
319
322
  def _get_images_batches(self, num_of_images: int) -> List[Any]:
320
323
  """
@@ -115,7 +115,8 @@ def _compute_bias_correction(kernel: np.ndarray,
115
115
  quantized_kernel: np.ndarray,
116
116
  in_statistics_container: BaseStatsCollector,
117
117
  output_channels_axis: int,
118
- input_channels_axis: int) -> Any:
118
+ input_channels_axis: int,
119
+ node_name: str) -> Any:
119
120
  """
120
121
  Compute the bias correction term for the bias in the error on the layer’s output,
121
122
  that is introduced by the weights quantization.
@@ -154,14 +155,14 @@ def _compute_bias_correction(kernel: np.ndarray,
154
155
 
155
156
  # Sanity validation
156
157
  if is_non_positive_integer(num_groups) or is_non_positive_integer(num_out_channels / num_groups):
157
- Logger.warning("Skipping bias correction due to valiation problem.")
158
+ Logger.warning(f"Skipping bias correction due to validation problem in node {node_name}.")
158
159
  return correction_term
159
160
 
160
161
  num_out_channels_per_group = int(num_out_channels / num_groups)
161
162
 
162
163
  # In Pytorch the output of group conv is separated into respective groups is
163
164
  # viewed as follows: (batch, channel, ngroups, h, w),
164
- # i.e each group is consistently viewed one after the other
165
+ # i.e. each group is consistently viewed one after the other
165
166
  # For an example, check out: https://discuss.pytorch.org/t/group-convolution-output-order/88258
166
167
  mu_split = np.split(mu, num_groups)
167
168
  eps_split = np.split(eps, num_groups, 0)
@@ -205,5 +206,6 @@ def _get_bias_correction_term_of_node(input_channels_axis: int,
205
206
  quantized_kernel,
206
207
  node_in_stats_collector,
207
208
  output_channels_axis,
208
- input_channels_axis)
209
+ input_channels_axis,
210
+ n.name)
209
211
  return correction
@@ -1,76 +0,0 @@
1
- # Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- from enum import Enum
16
- from functools import partial
17
-
18
- import numpy as np
19
-
20
-
21
- def get_average_weights(distance_matrix: np.ndarray) -> np.ndarray:
22
- """
23
- Get weights for weighting the sensitivity among different layers when evaluating MP configurations on
24
- model's sensitivity. This function returns equal weights for each layer, such that the sensitivity
25
- is averaged over all layers.
26
-
27
- Args:
28
- distance_matrix: Numpy array at shape (L,M): L -number of interest points, M number of samples.
29
- The matrix contain the distance for each interest point at each sample.
30
-
31
- Returns:
32
- Numpy array containing equal weights for sensitivity weighting.
33
- """
34
-
35
- num_nodes = len(distance_matrix)
36
- return np.asarray([1 / num_nodes for _ in range(num_nodes)])
37
-
38
-
39
- def get_last_layer_weights(distance_matrix: np.ndarray) -> np.ndarray:
40
- """
41
- Get weights for weighting the sensitivity among different layers when evaluating MP configurations on
42
- model's sensitivity. This function returns weights for each layer, such that the sensitivity
43
- is computed using only the last layer of the model (all other weights are zero).
44
-
45
- Args:
46
- distance_matrix: Numpy array at shape (L,M): L -number of interest points, M number of samples.
47
- The matrix contain the distance for each interest point at each sample.
48
-
49
- Returns:
50
- Numpy array containing weights for sensitivity weighting (all zero but the last one).
51
- """
52
- num_nodes = len(distance_matrix)
53
- w = np.asarray([0 for _ in range(num_nodes)])
54
- w[-1] = 1
55
- return w
56
-
57
-
58
- class MpDistanceWeighting(Enum):
59
- """
60
- Defines mixed precision distance metric weighting methods.
61
- The enum values can be used to call a function on a set of arguments and key-arguments.
62
-
63
- AVG - take the average distance on all computed layers.
64
-
65
- LAST_LAYER - take only the distance of the last layer output.
66
-
67
- """
68
-
69
- AVG = partial(get_average_weights)
70
- LAST_LAYER = partial(get_last_layer_weights)
71
-
72
- def __call__(self, distance_matrix: np.ndarray) -> np.ndarray:
73
- return self.value(distance_matrix)
74
-
75
- def __deepcopy__(self, memo):
76
- return self