mct-nightly 2.4.0.20250626.617__py3-none-any.whl → 2.4.0.20250628.555__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mct-nightly
3
- Version: 2.4.0.20250626.617
3
+ Version: 2.4.0.20250628.555
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Author-email: ssi-dnn-dev@sony.com
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,5 +1,5 @@
1
- mct_nightly-2.4.0.20250626.617.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
- model_compression_toolkit/__init__.py,sha256=7rL9mYdsLggQYsvr-mE5212sKK0euktVF9gCAbTK6a4,1557
1
+ mct_nightly-2.4.0.20250628.555.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
2
+ model_compression_toolkit/__init__.py,sha256=i97j--Rm-5M5jfoHTDDtJSsIJ2swWaHPeoSNq5P8zY4,1557
3
3
  model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
4
4
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
5
5
  model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
@@ -7,7 +7,7 @@ model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_A
7
7
  model_compression_toolkit/verify_packages.py,sha256=l0neIRr8q_QwxmuiTI4vyCMDISDedK0EihjEQUe66tE,1319
8
8
  model_compression_toolkit/core/__init__.py,sha256=HNverPpoqEyFKTa7iEdOqqY2P0Gq-7GMejNOi6ZPcQs,2042
9
9
  model_compression_toolkit/core/analyzer.py,sha256=5P03LbkFy-mu31TMAiQoIKcsA1-DNz7cTzkGvRaXtbw,3505
10
- model_compression_toolkit/core/graph_prep_runner.py,sha256=d-NrEUQETiY23-n9rLcf73PEnfaW-DbF5FSgWgRRE2w,10515
10
+ model_compression_toolkit/core/graph_prep_runner.py,sha256=8K5JWOiCJxd54nlyYQS7g5UJl30sE7VWuscxForJ3ZA,10526
11
11
  model_compression_toolkit/core/quantization_prep_runner.py,sha256=tz91E1BaNc_K0lvVZGB8oS6ya5N4Z5TJLG4pSM3hx30,6229
12
12
  model_compression_toolkit/core/runner.py,sha256=pmRJeIqB0dKnyNsNSoaBgAkHv_RhQZylknWRFmnoStM,12423
13
13
  model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
@@ -34,8 +34,8 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
34
34
  model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=Z-O03-DlM4XyllVg5FaQlYeIgk5UqoC8dSA6IlRODNI,22693
35
35
  model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=yxxxuwrmQ4wLW-PlTu0MEW59LmNJEh1OWy9Li15YH-8,7520
36
36
  model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
37
- model_compression_toolkit/core/common/graph/base_graph.py,sha256=mlQ-y3sX7C6ebJk43OdQKgQuyfVCpG6zuCQiRGvm3Co,39738
38
- model_compression_toolkit/core/common/graph/base_node.py,sha256=n_BEQWiKxhpB_vz-ILlKDsllt7E_F4lXhdhnDZSSOYg,35124
37
+ model_compression_toolkit/core/common/graph/base_graph.py,sha256=_bv_XeENRoJJLvro6raa-Cn57awgnl70TvMM4ZNjkyA,41449
38
+ model_compression_toolkit/core/common/graph/base_node.py,sha256=wmhNY6bSM8920HAhl7hRZsMBKIi5h7g6DrytbEbuz18,35536
39
39
  model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
40
40
  model_compression_toolkit/core/common/graph/functional_node.py,sha256=wtX6CYzAwgzkt_gp3wByfYydIUZV4vjIHt9TU3Ps9nw,4731
41
41
  model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
@@ -106,8 +106,8 @@ model_compression_toolkit/core/common/quantization/bit_width_config.py,sha256=03
106
106
  model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py,sha256=X9W_az8RQtOi4aFDLNzHxES_r6qs0kixB8OQ7seOVe8,4992
107
107
  model_compression_toolkit/core/common/quantization/core_config.py,sha256=yxCzWqldcHoe8GGxrH0tp99bhrc5jDT7SgZftnMUUBE,2374
108
108
  model_compression_toolkit/core/common/quantization/debug_config.py,sha256=uH45Uq3Tp9FIyMynex_WY2_y-Kv8LuPw2XXZydnpW5A,1649
109
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=iYr3hBkAa4rQ5gCDhKQhrW8xK9luEASrngwdZiLAY_Q,7064
110
- model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=VhM8Wp0_cpG7ukU9trkoAaxhX-Jb4ogGtr1ODfxzNYA,30225
109
+ model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=AFabyE57oL-XmI8jUhMA5p6ZVZGJW8dkI1BClN8Gu3w,7670
110
+ model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=Zy2Cz3vhRcjV_J7fqJf79J5dyBQHQy9AMGxD9RFuV9I,30250
111
111
  model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=UkSVW7d1OF_Px9gAjsqqK65aYhIBFWaBO-_IH6_AFfg,4403
112
112
  model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=HfBkSiRTOf9mNF-TNQHTCCs3xSg66F20no0O6vl5v1Y,2154
113
113
  model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=7eG7dl1TcbdnHwgmvyjarxLs0o6Lw_9VAjXAm4rsiBk,3791
@@ -529,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
529
529
  model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=Y0oBl8qPFsdNrK49XczwmVacInJcOPHslVnFBs-iTCc,3742
530
530
  model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
531
531
  model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=n0HvWBzkBkUJZlS3WeynhpsRTps2qQkjlq7luliBHNU,9627
532
- mct_nightly-2.4.0.20250626.617.dist-info/METADATA,sha256=lbqZdnDqrpeyBbBGOvnhU3sN00dShfqBziSBw61N9Yo,25087
533
- mct_nightly-2.4.0.20250626.617.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
534
- mct_nightly-2.4.0.20250626.617.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
535
- mct_nightly-2.4.0.20250626.617.dist-info/RECORD,,
532
+ mct_nightly-2.4.0.20250628.555.dist-info/METADATA,sha256=FY4d6D9wbWtWWvn7sXzMdVLz_t4R3iVfbrYz_YQBPPA,25087
533
+ mct_nightly-2.4.0.20250628.555.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
534
+ mct_nightly-2.4.0.20250628.555.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
535
+ mct_nightly-2.4.0.20250628.555.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.4.0.20250626.000617"
30
+ __version__ = "2.4.0.20250628.000555"
@@ -32,13 +32,13 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
32
32
  from model_compression_toolkit.core.common.collectors.statistics_collector import scale_statistics, shift_statistics
33
33
  from model_compression_toolkit.core.common.pruning.pruning_section import PruningSection
34
34
  from model_compression_toolkit.core.common.user_info import UserInformation
35
- from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
35
+ from model_compression_toolkit.core.common.quantization.node_quantization_config import \
36
+ NodeActivationQuantizationConfig, ActivationQuantizationMode
36
37
  from model_compression_toolkit.logger import Logger
37
38
  from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import LayerFilterParams
38
39
  from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
39
40
  FrameworkQuantizationCapabilities
40
41
 
41
-
42
42
  def validate_graph_after_change(method: Callable) -> Callable:
43
43
  """
44
44
  Decorator for graph-mutating methods. After the decorated method executes,
@@ -876,15 +876,32 @@ class Graph(nx.MultiDiGraph, GraphSearches):
876
876
 
877
877
  return intermediate_nodes, next_node
878
878
 
879
- def disable_fused_nodes_activation_quantization(self):
879
+ def override_fused_node_activation_quantization_candidates(self):
880
880
  """
881
- Disable activation quantization for all nodes in fused operations,
881
+ Override fused node activation quantization candidates for all nodes in fused operations,
882
882
  except for the last node in each fused group.
883
- """
884
- nodes_to_disable = self.fusing_info.get_inner_fln_nodes()
885
- for node in nodes_to_disable:
886
- for qc in node.candidates_quantization_cfg:
887
- qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
883
+ Update the value of quantization_config with the value of op_quaitization_cfg from FusingInfo.
884
+ """
885
+ from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import CandidateNodeQuantizationConfig
886
+
887
+ nodes_in_fln = self.fusing_info.get_inner_fln_nodes()
888
+ for node in nodes_in_fln:
889
+ fused_node_op_id = self.fusing_info.get_fused_op_id_for_node(node.name)
890
+ fusiong_op_quaitization_cfg = self.fusing_info.get_fused_op_quantization_config(fused_node_op_id)
891
+ org_candidate = node.candidates_quantization_cfg[0]
892
+ if fusiong_op_quaitization_cfg is not None and fusiong_op_quaitization_cfg.enable_activation_quantization:
893
+ # Set ActivationQuantizationMode to FLN_QUANT and update the value of quantization_config
894
+ activation_quantization_cfg = NodeActivationQuantizationConfig(qc=org_candidate,
895
+ op_cfg=fusiong_op_quaitization_cfg,
896
+ activation_quantization_fn=org_candidate.activation_quantization_cfg.activation_quantization_fn,
897
+ activation_quantization_params_fn=org_candidate.activation_quantization_cfg.activation_quantization_params_fn)
898
+ activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
899
+ for qc in node.candidates_quantization_cfg:
900
+ qc.activation_quantization_cfg = activation_quantization_cfg
901
+ else:
902
+ # Set ActivationQuantizationMode to FLN_NO_QUANT
903
+ for qc in node.candidates_quantization_cfg:
904
+ qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_NO_QUANT
888
905
 
889
906
  def validate(self):
890
907
  """
@@ -908,4 +925,4 @@ class Graph(nx.MultiDiGraph, GraphSearches):
908
925
  """
909
926
  Wrap networkx functions (that modifies the graph) with our validate decorator.
910
927
  """
911
- return super().remove_edge(*args, **kwargs)
928
+ return super().remove_edge(*args, **kwargs)
@@ -216,19 +216,31 @@ class BaseNode:
216
216
  Returns: Whether node activation quantization is enabled or not.
217
217
  """
218
218
  return self._is_single_quant_mode(ActivationQuantizationMode.QUANT)
219
-
220
- def is_fln_quantization(self) -> bool:
219
+
220
+ def is_fln_no_quantization(self) -> bool:
221
221
  """
222
- Returns: Whether the node's activation quantization is FLN
222
+ Returns: Whether node is FLN no quantization.
223
223
  """
224
- return self._is_single_quant_mode(ActivationQuantizationMode.FLN_QUANT)
225
-
224
+ return self._is_single_quant_mode(ActivationQuantizationMode.FLN_NO_QUANT)
225
+
226
226
  def is_quantization_preserving(self) -> bool:
227
227
  """
228
228
  Returns: Whether node activation quantization information is preserved from its inputs.
229
229
  """
230
230
  return self._is_single_quant_mode(ActivationQuantizationMode.PRESERVE_QUANT)
231
231
 
232
+ def is_no_quantization(self) -> bool:
233
+ """
234
+ Returns: Whether node is no quantization.
235
+ """
236
+ return self._is_single_quant_mode(ActivationQuantizationMode.NO_QUANT)
237
+
238
+ def is_fln_quantization(self) -> bool:
239
+ """
240
+ Returns: Whether the node's activation quantization is FLN
241
+ """
242
+ return self._is_single_quant_mode(ActivationQuantizationMode.FLN_QUANT)
243
+
232
244
  def is_weights_quantization_enabled(self, attr_name: str) -> bool:
233
245
  """
234
246
  Checks whether a node's weights attribute quantization is enabled.
@@ -21,7 +21,6 @@ from model_compression_toolkit.constants import FLOAT_BITWIDTH
21
21
  from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
22
22
  CandidateNodeQuantizationConfig
23
23
 
24
-
25
24
  def filter_nodes_candidates(graph: Graph):
26
25
  """
27
26
  Filters the graph's nodes candidates configuration list.
@@ -87,7 +86,7 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
87
86
  filtered_candidates = copy.deepcopy(node.candidates_quantization_cfg)
88
87
  final_candidates = copy.deepcopy(node.candidates_quantization_cfg)
89
88
 
90
- if (node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr)) and not node.is_activation_quantization_enabled():
89
+ if (node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr)) and node.is_no_quantization():
91
90
  # If activation quantization is disabled and the node doesn't have a kernel or doesn't quantize the kernel,
92
91
  # but for some reason the node has multiple candidates then replace it with a single dummy candidate with
93
92
  # default bit-width values.
@@ -102,9 +101,10 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
102
101
 
103
102
  final_candidates = [single_dummy_candidate]
104
103
 
105
- elif not node.is_activation_quantization_enabled():
104
+ elif node.is_no_quantization():
106
105
  # Remove candidates that have duplicated weights candidates for node with disabled activation quantization.
107
106
  # Replacing the activation n_bits in the remained configurations with default value to prevent confusion.
107
+ # Set the config of the non-quantized FLN node to POWER_OF_TWO.
108
108
  seen_candidates = set()
109
109
  filtered_candidates = [candidate for candidate in filtered_candidates if
110
110
  candidate.weights_quantization_cfg not in seen_candidates
@@ -116,6 +116,14 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
116
116
 
117
117
  final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
118
118
 
119
+ elif node.is_fln_no_quantization() or node.is_fln_quantization():
120
+ # Remove candidates that have duplicated weights candidates for node with disabled activation quantization.
121
+ seen_candidates = set()
122
+ filtered_candidates = [candidate for candidate in filtered_candidates if
123
+ candidate.weights_quantization_cfg not in seen_candidates
124
+ and not seen_candidates.add(candidate.weights_quantization_cfg)]
125
+ final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
126
+
119
127
  elif node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr):
120
128
  # TODO:
121
129
  # To allow MP on positional weights we need to modify this to consider all weights not only kernel.
@@ -47,7 +47,7 @@ class ActivationQuantizationMode(Enum):
47
47
  FLN_QUANT = auto()
48
48
  PRESERVE_QUANT = auto()
49
49
  NO_QUANT = auto()
50
-
50
+ FLN_NO_QUANT = auto()
51
51
 
52
52
  class BaseNodeQuantizationConfig(object):
53
53
  """
@@ -155,7 +155,7 @@ def get_finalized_graph(initial_graph: Graph,
155
155
  ######################################
156
156
  fusing_info = FusingInfoGenerator(fqc.get_fusing_patterns()).generate_fusing_info(transformed_graph)
157
157
  transformed_graph.fusing_info = fusing_info
158
- transformed_graph.disable_fused_nodes_activation_quantization()
158
+ transformed_graph.override_fused_node_activation_quantization_candidates()
159
159
 
160
160
  ######################################
161
161
  # Channel equalization