mct-nightly 2.3.0.20250428.605__py3-none-any.whl → 2.3.0.20250430.538__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mct-nightly
3
- Version: 2.3.0.20250428.605
3
+ Version: 2.3.0.20250430.538
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: Apache Software License
@@ -51,7 +51,7 @@ ______________________________________________________________________
51
51
  </p>
52
52
  <p align="center">
53
53
  <a href="https://sony.github.io/model_optimization#prerequisites"><img src="https://img.shields.io/badge/pytorch-2.2%20%7C%202.3%20%7C%202.4%20%7C%202.5-blue" /></a>
54
- <a href="https://sony.github.io/model_optimization#prerequisites"><img src="https://img.shields.io/badge/tensorflow-2.12%20%7C%202.13%20%7C%202.14%20%7C%202.15-blue" /></a>
54
+ <a href="https://sony.github.io/model_optimization#prerequisites"><img src="https://img.shields.io/badge/tensorflow-02.14%20%7C%202.15-blue" /></a>
55
55
  <a href="https://sony.github.io/model_optimization#prerequisites"><img src="https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue" /></a>
56
56
  <a href="https://github.com/sony/model_optimization/releases"><img src="https://img.shields.io/github/v/release/sony/model_optimization" /></a>
57
57
  <a href="https://github.com/sony/model_optimization/blob/main/LICENSE.md"><img src="https://img.shields.io/badge/license-Apache%202.0-blue" /></a>
@@ -171,7 +171,7 @@ Currently, MCT is being tested on various Python, Pytorch and TensorFlow version
171
171
  | Python 3.12 | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch22.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch22.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch23.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch23.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch24.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch24.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch25.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python312_pytorch25.yml) |
172
172
 
173
173
  | | TensorFlow 2.14 | TensorFlow 2.15 |
174
- |-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
174
+ |-------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
175
175
  | Python 3.9 | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_keras214.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_keras214.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_keras215.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_keras215.yml) |
176
176
  | Python 3.10 | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_keras214.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_keras214.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_keras215.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_keras215.yml) |
177
177
  | Python 3.11 | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_keras214.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_keras214.yml) | [![Run Tests](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_keras215.yml/badge.svg)](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_keras215.yml) |
@@ -1,5 +1,5 @@
1
- mct_nightly-2.3.0.20250428.605.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
- model_compression_toolkit/__init__.py,sha256=KTddrZxT3r5G_WJ3NWOixbYFMVcLk032Ii0ssUccyic,1557
1
+ mct_nightly-2.3.0.20250430.538.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
+ model_compression_toolkit/__init__.py,sha256=_uN22zhCLe0oS_Ex9lxOL4jmaLZUIfp9zJ82XXgBrqQ,1557
3
3
  model_compression_toolkit/constants.py,sha256=iJ6vfTjC2oFIZWt8wvHoxEw5YJi3yl0Hd4q30_8q0Zc,3958
4
4
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
5
5
  model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
@@ -66,11 +66,11 @@ model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,s
66
66
  model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
67
67
  model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
68
68
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=6pLUEEIqRTVIlCYQC4JIvY55KAvuBHEX8uTOQ-1Ac4Q,3859
69
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=r1t025_QHshyoop-PZvL7x6UuXaeplCCU3h4VNBhJHo,4309
69
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=onHgDwfw8CUbZFNU-RYit9eqA6FrzAtFA3akVZ2d7IM,4533
70
70
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=-hOMBucYn12ePyLd0b1KxniPOIRu4b53SwEzv0bWToI,4943
71
71
  model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=d5-3j2e_rdcQOT7c4s0p7640i3nSetjJ6MgMhhMM7dc,6152
72
- model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=658DBP0sY6DRqEbFcK1gX4EGQMeaBSFE5-7_Py6sioE,37718
73
- model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=4bkM8pYKvk18cxHbx973Dz6qWrNT0MRm44cuk__qVaI,27297
72
+ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=J8io_axti6gRoch9QR0FmKOP8JSHGeKqX95rf-nG6fI,37719
73
+ model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=R3UIO9lKf-lpEGfJOqgpQAXdP1IWMatWxXKYDkhWj_E,28096
74
74
  model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
75
75
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=S1ChgxtUjzXJufNWyRbKoNdyNC6fGUjPeComDMx8ZCo,9479
76
76
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
@@ -335,7 +335,7 @@ model_compression_toolkit/exporter/model_exporter/keras/mctq_keras_exporter.py,s
335
335
  model_compression_toolkit/exporter/model_exporter/pytorch/__init__.py,sha256=uZ2RigbY9O2PJ0Il8wPpS_s7frgg9WUGd_SHeKGyl1A,699
336
336
  model_compression_toolkit/exporter/model_exporter/pytorch/base_pytorch_exporter.py,sha256=UPVkEUQCMZ4Lld6CRnEOPEmlfe5vcQZG0Q3FwRBodD4,4021
337
337
  model_compression_toolkit/exporter/model_exporter/pytorch/export_serialization_format.py,sha256=bPevy6OBqng41PqytBR55e6cBEuyrUS0H8dWX4zgjQ4,967
338
- model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=G2X_lDx6u12U4ErEuEHCdNczh0qSGWObySw3upEys6Q,7506
338
+ model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=5DHg8ch8_DtSQ7M5Aw3LcZLSVFqFH556cKcosp3Ik-I,8720
339
339
  model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py,sha256=ksWV2A-Njo-wAxQ_Ye2sLIZXBWJ_WNyjT7-qFFwvV2o,2897
340
340
  model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py,sha256=8vYGKa58BkasvoHejYaPwubOJPcW0s-RY79_Kkw0Hy8,6236
341
341
  model_compression_toolkit/exporter/model_wrapper/__init__.py,sha256=7CF2zvpTrIEm8qnbuHnLZyTZkwBBxV24V8QA0oxGbh0,1187
@@ -528,7 +528,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
528
528
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
529
529
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
530
530
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
531
- mct_nightly-2.3.0.20250428.605.dist-info/METADATA,sha256=qHKhtkD9E5Npa0vcNQc376dwsvBE6iUM0aiTV1S76qg,25560
532
- mct_nightly-2.3.0.20250428.605.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
533
- mct_nightly-2.3.0.20250428.605.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
534
- mct_nightly-2.3.0.20250428.605.dist-info/RECORD,,
531
+ mct_nightly-2.3.0.20250430.538.dist-info/METADATA,sha256=8l9P7bLANjVXo-Z5_aR8x8VtcIWHhdYNNHQmoOS6cig,25101
532
+ mct_nightly-2.3.0.20250430.538.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
533
+ mct_nightly-2.3.0.20250430.538.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
534
+ mct_nightly-2.3.0.20250430.538.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.3.0.20250428.000605"
30
+ __version__ = "2.3.0.20250430.000538"
@@ -27,6 +27,7 @@ class MixedPrecisionQuantizationConfig:
27
27
  Args:
28
28
  compute_distance_fn (Callable): Function to compute a distance between two tensors. If None, using pre-defined distance methods based on the layer type for each layer.
29
29
  distance_weighting_method (MpDistanceWeighting): MpDistanceWeighting enum value that provides a function to use when weighting the distances among different layers when computing the sensitivity metric.
30
+ custom_metric_fn (Callable): Function to compute a custom metric. As input gets the model_mp and returns a float value for metric. If None, uses interest point metric.
30
31
  num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
31
32
  configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
32
33
  num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
@@ -39,6 +40,7 @@ class MixedPrecisionQuantizationConfig:
39
40
 
40
41
  compute_distance_fn: Optional[Callable] = None
41
42
  distance_weighting_method: MpDistanceWeighting = MpDistanceWeighting.AVG
43
+ custom_metric_fn: Optional[Callable] = None
42
44
  num_of_images: int = MP_DEFAULT_NUM_SAMPLES
43
45
  configuration_overwrite: Optional[List[int]] = None
44
46
  num_interest_points_factor: float = field(default=1.0, metadata={"description": "Should be between 0.0 and 1.0"})
@@ -169,6 +169,7 @@ class MixedPrecisionSearchManager:
169
169
  return self.sensitivity_evaluator.compute_metric(topo_cfg(cfg),
170
170
  node_idx,
171
171
  topo_cfg(baseline_cfg) if baseline_cfg else None)
172
+
172
173
  if self.using_virtual_graph:
173
174
  origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
174
175
  self.max_ru_config)
@@ -89,6 +89,9 @@ class SensitivityEvaluation:
89
89
  self.interest_points = get_mp_interest_points(graph,
90
90
  fw_impl.count_node_for_mixed_precision_interest_points,
91
91
  quant_config.num_interest_points_factor)
92
+ # If using a custom metric - return only model outputs
93
+ if self.quant_config.custom_metric_fn is not None:
94
+ self.interest_points = []
92
95
 
93
96
  # We use normalized MSE when not running hessian-based. For Hessian-based normalized MSE is not needed
94
97
  # because hessian weights already do normalization.
@@ -96,6 +99,9 @@ class SensitivityEvaluation:
96
99
  self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points, use_normalized_mse)
97
100
 
98
101
  self.output_points = get_output_nodes_for_metric(graph)
102
+ # If using a custom metric - return all model outputs
103
+ if self.quant_config.custom_metric_fn is not None:
104
+ self.output_points = [n.node for n in graph.get_outputs()]
99
105
  self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(self.output_points,
100
106
  use_normalized_mse)
101
107
 
@@ -160,7 +166,7 @@ class SensitivityEvaluation:
160
166
  """
161
167
  Compute the sensitivity metric of the MP model for a given configuration (the sensitivity
162
168
  is computed based on the similarity of the interest points' outputs between the MP model
163
- and the float model).
169
+ and the float model or a custom metric if given).
164
170
 
165
171
  Args:
166
172
  mp_model_configuration: Bitwidth configuration to use to configure the MP model.
@@ -177,15 +183,21 @@ class SensitivityEvaluation:
177
183
  node_idx)
178
184
 
179
185
  # Compute the distance metric
180
- ipts_distances, out_pts_distances = self._compute_distance()
186
+ if self.quant_config.custom_metric_fn is None:
187
+ ipts_distances, out_pts_distances = self._compute_distance()
188
+ sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
189
+ self.quant_config.distance_weighting_method)
190
+ else:
191
+ sensitivity_metric = self.quant_config.custom_metric_fn(self.model_mp)
192
+ if not isinstance(sensitivity_metric, (float, np.floating)):
193
+ raise TypeError(f'The custom_metric_fn is expected to return float or numpy float, got {type(sensitivity_metric).__name__}')
181
194
 
182
195
  # Configure MP model back to the same configuration as the baseline model if baseline provided
183
196
  if baseline_mp_configuration is not None:
184
197
  self._configure_bitwidths_model(baseline_mp_configuration,
185
198
  node_idx)
186
199
 
187
- return self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
188
- self.quant_config.distance_weighting_method)
200
+ return sensitivity_metric
189
201
 
190
202
  def _init_baseline_tensors_list(self):
191
203
  """
@@ -24,11 +24,11 @@ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
24
24
  from model_compression_toolkit.exporter.model_exporter.pytorch.base_pytorch_exporter import BasePyTorchExporter
25
25
  from mct_quantizers import pytorch_quantizers
26
26
 
27
-
28
27
  if FOUND_ONNX:
29
28
  import onnx
30
29
  from mct_quantizers.pytorch.metadata import add_onnx_metadata
31
30
 
31
+
32
32
  class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
33
33
  """
34
34
  Exporter for fakely-quant PyTorch models.
@@ -63,7 +63,7 @@ if FOUND_ONNX:
63
63
  self._use_onnx_custom_quantizer_ops = use_onnx_custom_quantizer_ops
64
64
  self._onnx_opset_version = onnx_opset_version
65
65
 
66
- def export(self) -> None:
66
+ def export(self, output_names=None) -> None:
67
67
  """
68
68
  Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
69
69
  (namely, weights that are in fake-quant format) and fake-quant layers for the activations.
@@ -95,6 +95,28 @@ if FOUND_ONNX:
95
95
  Logger.info(f"Exporting fake-quant onnx model: {self.save_model_path}")
96
96
 
97
97
  model_input = to_torch_tensor(next(self.repr_dataset()))
98
+ model_output = self.model(*model_input) if isinstance(model_input, (list, tuple)) else self.model(
99
+ model_input)
100
+
101
+ if output_names is None:
102
+ # Determine number of outputs and prepare output_names and dynamic_axes
103
+ if isinstance(model_output, (list, tuple)):
104
+ output_names = [f"output_{i}" for i in range(len(model_output))]
105
+ dynamic_axes = {'input': {0: 'batch_size'}}
106
+ dynamic_axes.update({name: {0: 'batch_size'} for name in output_names})
107
+ else:
108
+ output_names = ['output']
109
+ dynamic_axes = {'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
110
+ else:
111
+ if isinstance(model_output, (list, tuple)):
112
+ num_of_outputs = len(model_output)
113
+ else:
114
+ num_of_outputs = 1
115
+ assert len(output_names) == num_of_outputs, (f"Mismatch between number of requested output names "
116
+ f"({output_names}) and model output count "
117
+ f"({num_of_outputs}):\n")
118
+ dynamic_axes = {'input': {0: 'batch_size'}}
119
+ dynamic_axes.update({name: {0: 'batch_size'} for name in output_names})
98
120
 
99
121
  if hasattr(self.model, 'metadata'):
100
122
  onnx_bytes = BytesIO()
@@ -104,9 +126,8 @@ if FOUND_ONNX:
104
126
  opset_version=self._onnx_opset_version,
105
127
  verbose=False,
106
128
  input_names=['input'],
107
- output_names=['output'],
108
- dynamic_axes={'input': {0: 'batch_size'},
109
- 'output': {0: 'batch_size'}})
129
+ output_names=output_names,
130
+ dynamic_axes=dynamic_axes)
110
131
  onnx_model = onnx.load_from_string(onnx_bytes.getvalue())
111
132
  onnx_model = add_onnx_metadata(onnx_model, self.model.metadata)
112
133
  onnx.save_model(onnx_model, self.save_model_path)
@@ -117,9 +138,8 @@ if FOUND_ONNX:
117
138
  opset_version=self._onnx_opset_version,
118
139
  verbose=False,
119
140
  input_names=['input'],
120
- output_names=['output'],
121
- dynamic_axes={'input': {0: 'batch_size'},
122
- 'output': {0: 'batch_size'}})
141
+ output_names=output_names,
142
+ dynamic_axes=dynamic_axes)
123
143
 
124
144
  for layer in self.model.children():
125
145
  # Set disable for reuse for weight quantizers if quantizer was reused