mct-nightly 2.4.0.20250608.655__py3-none-any.whl → 2.4.0.20250609.615__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mct-nightly
3
- Version: 2.4.0.20250608.655
3
+ Version: 2.4.0.20250609.615
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Author-email: ssi-dnn-dev@sony.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,5 +1,5 @@
1
- mct_nightly-2.4.0.20250608.655.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
- model_compression_toolkit/__init__.py,sha256=9nm4cV4Z-_J1NtF0o6cjMqNu_GjdOWHTgWglMLmuB9Y,1557
1
+ mct_nightly-2.4.0.20250609.615.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
+ model_compression_toolkit/__init__.py,sha256=iZK1iC0PgvlQ8g_69klbV_2fHfkRz-cbEc-fJLlJHQY,1557
3
3
  model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
4
4
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
5
5
  model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
@@ -16,7 +16,7 @@ model_compression_toolkit/core/common/framework_implementation.py,sha256=JQI_eoZ
16
16
  model_compression_toolkit/core/common/framework_info.py,sha256=5tderHT-7Cd21QrRFIJj3hH_gAcnlivOzwZ5m1ldJOs,6526
17
17
  model_compression_toolkit/core/common/memory_computation.py,sha256=ixoSpV5ZYZGyzhre3kQcvR2sNA8KBsPZ3lgbkDnw9Cs,1205
18
18
  model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3ug7Y9-lLyV99_FoNHxkGZMgcm0Vkpss,1324
19
- model_compression_toolkit/core/common/model_collector.py,sha256=ssYLdo2FPEzXoFKQonUA3ZbrIaDp-UmLhv0mWMCHH8U,13225
19
+ model_compression_toolkit/core/common/model_collector.py,sha256=AbnJVjDlpmQZyj3Ba0XkK76zWh5dqcDFJdKXFTq25uw,13420
20
20
  model_compression_toolkit/core/common/model_validation.py,sha256=LaG8wd6aZl0OJgieE3SeiVDEPxtk8IHq9-3wSnmWhY4,1214
21
21
  model_compression_toolkit/core/common/node_prior_info.py,sha256=WXX_PrGVG9M9I_REG5ZzFBohwmV4yf356sZnrja_FLo,2832
22
22
  model_compression_toolkit/core/common/similarity_analyzer.py,sha256=S3f6WgHyw62dGcxpX51FGKyfebe2zv9ABKbjtGyKRvY,9215
@@ -31,10 +31,10 @@ model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.p
31
31
  model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=psijsQZefwjMDH8SU5E18n65HiGtQilPhKr1hhzZX-I,8268
32
32
  model_compression_toolkit/core/common/collectors/weighted_histogram_collector.py,sha256=zp3dE7YTqWmkD5QWdRhsl9zD8W6Lr96G1Wjw1g2D3T0,4894
33
33
  model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
34
- model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=uDxF0awrjn3SbcpXBpoQ4OGcKO6Z7HBk8ierZPCGbGo,21970
34
+ model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=Z-O03-DlM4XyllVg5FaQlYeIgk5UqoC8dSA6IlRODNI,22693
35
35
  model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=yxxxuwrmQ4wLW-PlTu0MEW59LmNJEh1OWy9Li15YH-8,7520
36
36
  model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
37
- model_compression_toolkit/core/common/graph/base_graph.py,sha256=YA0c8ucaaZu9eRO-xruLqDT3QFOpxq24ViG6ILS2jqA,41403
37
+ model_compression_toolkit/core/common/graph/base_graph.py,sha256=NmGvxGg-UHsNEjz_mtwLhS0HMEM2-pbQPPax1QUw4o0,41378
38
38
  model_compression_toolkit/core/common/graph/base_node.py,sha256=8GEqZ8VMtVvJZuiSUVdokCq6NkFosOssetcod21DwDM,33604
39
39
  model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
40
40
  model_compression_toolkit/core/common/graph/functional_node.py,sha256=GH5wStmw8SoAj5IdT_-ItN1Meo_P5NUTt_5bgJC4fak,3935
@@ -72,7 +72,7 @@ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_man
72
72
  model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=MY8df-c_kITEr_7hOctaxhdiq29hSTA0La9Qo0oTJJY,9678
73
73
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
74
74
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
75
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=8f6KDTKD8SzVXDl9jmYJ-p19cQB0Nr_UTdCPuhELTdg,40329
75
+ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=4Hl4JmrUAJjSNeT1efMTrM4UzHPr8RQeya0OY-6adWY,40304
76
76
  model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=ZY5yFIDzbaqIk0UzakDBObfsVevn4fydqAfAm4RCikY,4058
77
77
  model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
78
78
  model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=6Z6nQL9UH7B8dbcUR0cuCTEYFOKZAlvOb-SCk_cAZFA,6670
@@ -336,7 +336,7 @@ model_compression_toolkit/exporter/model_exporter/keras/mctq_keras_exporter.py,s
336
336
  model_compression_toolkit/exporter/model_exporter/pytorch/__init__.py,sha256=uZ2RigbY9O2PJ0Il8wPpS_s7frgg9WUGd_SHeKGyl1A,699
337
337
  model_compression_toolkit/exporter/model_exporter/pytorch/base_pytorch_exporter.py,sha256=9adOGG1nyviNzuL-1aJXyL0c_VQllSZWiG2gR-puywo,6420
338
338
  model_compression_toolkit/exporter/model_exporter/pytorch/export_serialization_format.py,sha256=bPevy6OBqng41PqytBR55e6cBEuyrUS0H8dWX4zgjQ4,967
339
- model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=5S3lyNVc3F62mvS8Q-RTmgQXWI6GWZ5YRvjG4qFy2MM,9520
339
+ model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=1ix8j7rxc1giPjf2PZKwaaCb5pKo0obUvPmRtklmugY,10056
340
340
  model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py,sha256=ksWV2A-Njo-wAxQ_Ye2sLIZXBWJ_WNyjT7-qFFwvV2o,2897
341
341
  model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py,sha256=7xuUrHPMiifn23sWfeiqR9wkYhm8EweDRL_vF-JSxMY,6642
342
342
  model_compression_toolkit/exporter/model_wrapper/__init__.py,sha256=7CF2zvpTrIEm8qnbuHnLZyTZkwBBxV24V8QA0oxGbh0,1187
@@ -529,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
529
529
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
530
530
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
531
531
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
532
- mct_nightly-2.4.0.20250608.655.dist-info/METADATA,sha256=8vPT5m8b4jAczYopmCw7AX6Zkw60YCFVYC26axUCmqA,25087
533
- mct_nightly-2.4.0.20250608.655.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
534
- mct_nightly-2.4.0.20250608.655.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
535
- mct_nightly-2.4.0.20250608.655.dist-info/RECORD,,
532
+ mct_nightly-2.4.0.20250609.615.dist-info/METADATA,sha256=QQwkgGOWQt_9w0Xc9uRQrcvJg370QpweeRXfkzForV0,25087
533
+ mct_nightly-2.4.0.20250609.615.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
534
+ mct_nightly-2.4.0.20250609.615.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
535
+ mct_nightly-2.4.0.20250609.615.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.4.0.20250608.000655"
30
+ __version__ = "2.4.0.20250609.000615"
@@ -39,13 +39,13 @@ class FusingInfo:
39
39
  - 'fusing_patterns': The patterns to generate the fused operators from.
40
40
  - 'manual_fused_ops': List of sequence of node names to handle as fused ops (even if they are not part of the fusing patterns).
41
41
  - `fusing_data`: A dictionary mapping fused operation IDs to lists of nodes that belong to that operation.
42
- - `node_to_fused_node_map`: A dictionary mapping each node name to the ID of the fused operation it belongs to.
42
+ - `node_name_to_fused_op_id`: A dictionary mapping each node name to the ID of the fused operation it belongs to.
43
43
 
44
44
  """
45
45
  fusing_patterns: List[list[any]] = None
46
46
  manual_fused_ops: List[List[str]] = None
47
47
  fusing_data: Dict[str, Tuple['BaseNode']] = field(default_factory=dict)
48
- node_to_fused_node_map: Dict[str, str] = field(init=False, default_factory=dict)
48
+ node_name_to_fused_op_id: Dict[str, str] = field(init=False, default_factory=dict)
49
49
  fused_op_id_to_quant_config: Dict[str, OpQuantizationConfig] = field(default_factory=dict)
50
50
 
51
51
  def __post_init__(self):
@@ -64,10 +64,10 @@ class FusingInfo:
64
64
  """
65
65
  Init the node-to-fused-node mapping based on the initial fusing data.
66
66
  """
67
- self.node_to_fused_node_map.clear()
67
+ self.node_name_to_fused_op_id.clear()
68
68
  for op_id, nodes in self.fusing_data.items():
69
69
  for node in nodes:
70
- self.node_to_fused_node_map[node.name] = op_id
70
+ self.node_name_to_fused_op_id[node.name] = op_id
71
71
 
72
72
  def get_manual_nodes_to_fuse(self) -> List[List[str]]:
73
73
  """
@@ -115,7 +115,7 @@ class FusingInfo:
115
115
  self.fusing_data[op_id] = nodes
116
116
  # Update the mapping for these nodes
117
117
  for node in nodes:
118
- self.node_to_fused_node_map[node.name] = op_id
118
+ self.node_name_to_fused_op_id[node.name] = op_id
119
119
 
120
120
  # Update the quantization config mapping for this operation
121
121
  if self.fusing_patterns is not None:
@@ -152,7 +152,7 @@ class FusingInfo:
152
152
  self._manual_fused_ops.remove(node_names)
153
153
 
154
154
  for node in nodes:
155
- self.node_to_fused_node_map.pop(node.name, None)
155
+ self.node_name_to_fused_op_id.pop(node.name, None)
156
156
  del self.fusing_data[op_id]
157
157
  self.fused_op_id_to_quant_config.pop(op_id, None)
158
158
 
@@ -166,7 +166,7 @@ class FusingInfo:
166
166
  Returns:
167
167
  The name of the fused node containing this node, or None if not fused.
168
168
  """
169
- return self.node_to_fused_node_map.get(node_name)
169
+ return self.node_name_to_fused_op_id.get(node_name)
170
170
 
171
171
  def get_node_to_fused_node_map(self) -> Dict[str, str]:
172
172
  """
@@ -175,7 +175,7 @@ class FusingInfo:
175
175
  Returns:
176
176
  A dictionary mapping each original node name to its fused node name.
177
177
  """
178
- return self.node_to_fused_node_map.copy()
178
+ return self.node_name_to_fused_op_id.copy()
179
179
 
180
180
  def get_fusing_quantization_config_map(self) -> Dict[str, OpQuantizationConfig]:
181
181
  """
@@ -198,10 +198,12 @@ class FusingInfo:
198
198
  """
199
199
  return self.fusing_data.get(op_id)
200
200
 
201
- def get_nodes_to_disable_activation_quantization(self) -> List['BaseNode']:
201
+ def get_inner_fln_nodes(self) -> List['BaseNode']:
202
202
  """
203
- Returns a list of the nodes that their activation quantization is disabled due to fusing.
203
+ Returns a list of the nodes that are part but not the last node of an FLN.
204
204
  """
205
+ # TODO: the order of the nodes is not gurenteed when returned as dict from get_all_fused_operations -
206
+ # then, removing the last one can cause issues
205
207
  return [node for nodes in self.get_all_fused_operations().values() for node in nodes[:-1]]
206
208
 
207
209
  def get_fused_op_quantization_config(self, op_id: str) -> OpQuantizationConfig:
@@ -228,6 +230,22 @@ class FusingInfo:
228
230
  """
229
231
  return any(node in nodes for nodes in self.fusing_data.values())
230
232
 
233
+ def is_quantized_node_in_fln(self, node: 'BaseNode') -> bool:
234
+ """
235
+ Check whether a node inside an FLN and should be quantized.
236
+
237
+ Args:
238
+ node (BaseNode): The node to check.
239
+
240
+ Returns:
241
+ bool: True if the node is in any fused operation and should be quantized.
242
+ """
243
+ if self.is_node_in_fused_op(node):
244
+ node_q_cfg = self.fused_op_id_to_quant_config[self.node_name_to_fused_op_id[node.name]]
245
+ return node_q_cfg is not None and node_q_cfg.enable_activation_quantization
246
+
247
+ return False
248
+
231
249
  def get_all_fused_operations(self) -> Dict[str, Tuple['BaseNode']]:
232
250
  """
233
251
  Retrieve fused information.
@@ -340,7 +358,7 @@ class FusingInfo:
340
358
  for op_id, nodes in self.fusing_data.items()
341
359
  )
342
360
  mapping_repr = ", ".join(
343
- f"{node} -> {op_id}" for node, op_id in self.node_to_fused_node_map.items()
361
+ f"{node} -> {op_id}" for node, op_id in self.node_name_to_fused_op_id.items()
344
362
  )
345
363
  return (
346
364
  f"FusingInfo(\n"
@@ -908,7 +908,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
908
908
  Disable activation quantization for all nodes in fused operations,
909
909
  except for the last node in each fused group.
910
910
  """
911
- nodes_to_disable = self.fusing_info.get_nodes_to_disable_activation_quantization()
911
+ nodes_to_disable = self.fusing_info.get_inner_fln_nodes()
912
912
  for node in nodes_to_disable:
913
913
  for qc in node.candidates_quantization_cfg:
914
914
  qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
@@ -677,7 +677,7 @@ class ResourceUtilizationCalculator:
677
677
  elif target_criterion == TargetInclusionCriterion.AnyQuantizedNonFused:
678
678
  nodes = [n for n in nodes if n.is_activation_quantization_enabled() or n.is_quantization_preserving()]
679
679
  # remove fused nodes (due to SNC, where the non-linear is quantized, even though it should not be quantized)
680
- nodes = [n for n in nodes if n not in self.graph.fusing_info.get_nodes_to_disable_activation_quantization()]
680
+ nodes = [n for n in nodes if n not in self.graph.fusing_info.get_inner_fln_nodes()]
681
681
  elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
682
682
  nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
683
683
  elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
@@ -30,7 +30,8 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
30
30
 
31
31
 
32
32
  def create_stats_collector_for_node(node: common.BaseNode,
33
- fw_info: FrameworkInfo) -> BaseStatsCollector:
33
+ fw_info: FrameworkInfo,
34
+ quant_node_in_fln: bool) -> BaseStatsCollector:
34
35
  """
35
36
  Gets a node and a groups list and create and return a statistics collector for a node
36
37
  according to whether its statistics should be collected and the prior information we
@@ -44,7 +45,7 @@ def create_stats_collector_for_node(node: common.BaseNode,
44
45
  Statistics collector for statistics collection for the node.
45
46
  """
46
47
 
47
- if node.is_activation_quantization_enabled() or node.is_fln_quantization():
48
+ if node.is_activation_quantization_enabled() or quant_node_in_fln:
48
49
  min_output = getattr(node.prior_info, 'min_output', None)
49
50
  max_output = getattr(node.prior_info, 'max_output', None)
50
51
  stats_collector = common.StatsCollector(out_channel_axis=fw_info.out_channel_axis_mapping.get(node.type),
@@ -160,7 +161,8 @@ class ModelCollector:
160
161
 
161
162
  # Assign statistics collectors to nodes
162
163
  for n in graph.get_topo_sorted_nodes():
163
- sc = create_stats_collector_for_node(n, fw_info=fw_info) # Get static collector for the node
164
+ quant_node_in_fln = n.is_fln_quantization() and graph.fusing_info.is_quantized_node_in_fln(n)
165
+ sc = create_stats_collector_for_node(n, fw_info=fw_info, quant_node_in_fln=quant_node_in_fln) # Get static collector for the node
164
166
  # If we use bias correction, and the node has kernel weights to quantize, we need to make sure
165
167
  # its previous nodes' tensors are consistent with this node.
166
168
  kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
@@ -78,9 +78,18 @@ if FOUND_ONNX:
78
78
  act_holder_list = [n for n, m in self.model.named_modules()
79
79
  if isinstance(m, PytorchActivationQuantizationHolder) and
80
80
  m.activation_holder_quantizer.num_bits > 8]
81
- for act_holder in act_holder_list: # pragma: no cover
82
- delattr(self.model, act_holder)
83
- setattr(self.model, act_holder, torch.nn.Identity())
81
+ for act_holder in act_holder_list: # pragma: no cover
82
+ obj = self.model
83
+ attrs = act_holder.split(".")
84
+ for a in attrs[:-1]:
85
+ obj = getattr(obj, a)
86
+ if hasattr(obj, attrs[-1]):
87
+ delattr(obj, attrs[-1])
88
+ setattr(obj, attrs[-1], torch.nn.Identity())
89
+ else:
90
+ Logger.info(f"During removal of activation quantization of a quantizer (with bits > 8) in ONNX FQ "
91
+ f"export, deletion of activation holder '{act_holder}' failed — could not locate one or"
92
+ f"more intermediate attributes in the path.")
84
93
 
85
94
  for layer in self.model.children():
86
95
  self.is_layer_exportable_fn(layer)