mct-nightly 2.4.0.20250626.617__py3-none-any.whl → 2.4.0.20250628.555__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/METADATA +1 -1
- {mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/RECORD +11 -11
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/base_graph.py +27 -10
- model_compression_toolkit/core/common/graph/base_node.py +17 -5
- model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py +11 -3
- model_compression_toolkit/core/common/quantization/node_quantization_config.py +1 -1
- model_compression_toolkit/core/graph_prep_runner.py +1 -1
- {mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/WHEEL +0 -0
- {mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/licenses/LICENSE.md +0 -0
- {mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/top_level.txt +0 -0
{mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/RECORD
RENAMED
@@ -1,5 +1,5 @@
|
|
1
|
-
mct_nightly-2.4.0.
|
2
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
mct_nightly-2.4.0.20250628.555.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
2
|
+
model_compression_toolkit/__init__.py,sha256=i97j--Rm-5M5jfoHTDDtJSsIJ2swWaHPeoSNq5P8zY4,1557
|
3
3
|
model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
|
4
4
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
5
5
|
model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
|
@@ -7,7 +7,7 @@ model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_A
|
|
7
7
|
model_compression_toolkit/verify_packages.py,sha256=l0neIRr8q_QwxmuiTI4vyCMDISDedK0EihjEQUe66tE,1319
|
8
8
|
model_compression_toolkit/core/__init__.py,sha256=HNverPpoqEyFKTa7iEdOqqY2P0Gq-7GMejNOi6ZPcQs,2042
|
9
9
|
model_compression_toolkit/core/analyzer.py,sha256=5P03LbkFy-mu31TMAiQoIKcsA1-DNz7cTzkGvRaXtbw,3505
|
10
|
-
model_compression_toolkit/core/graph_prep_runner.py,sha256=
|
10
|
+
model_compression_toolkit/core/graph_prep_runner.py,sha256=8K5JWOiCJxd54nlyYQS7g5UJl30sE7VWuscxForJ3ZA,10526
|
11
11
|
model_compression_toolkit/core/quantization_prep_runner.py,sha256=tz91E1BaNc_K0lvVZGB8oS6ya5N4Z5TJLG4pSM3hx30,6229
|
12
12
|
model_compression_toolkit/core/runner.py,sha256=pmRJeIqB0dKnyNsNSoaBgAkHv_RhQZylknWRFmnoStM,12423
|
13
13
|
model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
|
@@ -34,8 +34,8 @@ model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5
|
|
34
34
|
model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=Z-O03-DlM4XyllVg5FaQlYeIgk5UqoC8dSA6IlRODNI,22693
|
35
35
|
model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=yxxxuwrmQ4wLW-PlTu0MEW59LmNJEh1OWy9Li15YH-8,7520
|
36
36
|
model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
|
37
|
-
model_compression_toolkit/core/common/graph/base_graph.py,sha256=
|
38
|
-
model_compression_toolkit/core/common/graph/base_node.py,sha256=
|
37
|
+
model_compression_toolkit/core/common/graph/base_graph.py,sha256=_bv_XeENRoJJLvro6raa-Cn57awgnl70TvMM4ZNjkyA,41449
|
38
|
+
model_compression_toolkit/core/common/graph/base_node.py,sha256=wmhNY6bSM8920HAhl7hRZsMBKIi5h7g6DrytbEbuz18,35536
|
39
39
|
model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
|
40
40
|
model_compression_toolkit/core/common/graph/functional_node.py,sha256=wtX6CYzAwgzkt_gp3wByfYydIUZV4vjIHt9TU3Ps9nw,4731
|
41
41
|
model_compression_toolkit/core/common/graph/graph_matchers.py,sha256=CrDoHYq4iPaflgJWmoJ1K4ziLrRogJvFTVWg8P0UcDU,4744
|
@@ -106,8 +106,8 @@ model_compression_toolkit/core/common/quantization/bit_width_config.py,sha256=03
|
|
106
106
|
model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py,sha256=X9W_az8RQtOi4aFDLNzHxES_r6qs0kixB8OQ7seOVe8,4992
|
107
107
|
model_compression_toolkit/core/common/quantization/core_config.py,sha256=yxCzWqldcHoe8GGxrH0tp99bhrc5jDT7SgZftnMUUBE,2374
|
108
108
|
model_compression_toolkit/core/common/quantization/debug_config.py,sha256=uH45Uq3Tp9FIyMynex_WY2_y-Kv8LuPw2XXZydnpW5A,1649
|
109
|
-
model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=
|
110
|
-
model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=
|
109
|
+
model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=AFabyE57oL-XmI8jUhMA5p6ZVZGJW8dkI1BClN8Gu3w,7670
|
110
|
+
model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=Zy2Cz3vhRcjV_J7fqJf79J5dyBQHQy9AMGxD9RFuV9I,30250
|
111
111
|
model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=UkSVW7d1OF_Px9gAjsqqK65aYhIBFWaBO-_IH6_AFfg,4403
|
112
112
|
model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=HfBkSiRTOf9mNF-TNQHTCCs3xSg66F20no0O6vl5v1Y,2154
|
113
113
|
model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=7eG7dl1TcbdnHwgmvyjarxLs0o6Lw_9VAjXAm4rsiBk,3791
|
@@ -529,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
529
529
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=Y0oBl8qPFsdNrK49XczwmVacInJcOPHslVnFBs-iTCc,3742
|
530
530
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
531
531
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=n0HvWBzkBkUJZlS3WeynhpsRTps2qQkjlq7luliBHNU,9627
|
532
|
-
mct_nightly-2.4.0.
|
533
|
-
mct_nightly-2.4.0.
|
534
|
-
mct_nightly-2.4.0.
|
535
|
-
mct_nightly-2.4.0.
|
532
|
+
mct_nightly-2.4.0.20250628.555.dist-info/METADATA,sha256=FY4d6D9wbWtWWvn7sXzMdVLz_t4R3iVfbrYz_YQBPPA,25087
|
533
|
+
mct_nightly-2.4.0.20250628.555.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
534
|
+
mct_nightly-2.4.0.20250628.555.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
535
|
+
mct_nightly-2.4.0.20250628.555.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.4.0.
|
30
|
+
__version__ = "2.4.0.20250628.000555"
|
@@ -32,13 +32,13 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
|
|
32
32
|
from model_compression_toolkit.core.common.collectors.statistics_collector import scale_statistics, shift_statistics
|
33
33
|
from model_compression_toolkit.core.common.pruning.pruning_section import PruningSection
|
34
34
|
from model_compression_toolkit.core.common.user_info import UserInformation
|
35
|
-
from model_compression_toolkit.core.common.quantization.node_quantization_config import
|
35
|
+
from model_compression_toolkit.core.common.quantization.node_quantization_config import \
|
36
|
+
NodeActivationQuantizationConfig, ActivationQuantizationMode
|
36
37
|
from model_compression_toolkit.logger import Logger
|
37
38
|
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import LayerFilterParams
|
38
39
|
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
|
39
40
|
FrameworkQuantizationCapabilities
|
40
41
|
|
41
|
-
|
42
42
|
def validate_graph_after_change(method: Callable) -> Callable:
|
43
43
|
"""
|
44
44
|
Decorator for graph-mutating methods. After the decorated method executes,
|
@@ -876,15 +876,32 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
876
876
|
|
877
877
|
return intermediate_nodes, next_node
|
878
878
|
|
879
|
-
def
|
879
|
+
def override_fused_node_activation_quantization_candidates(self):
|
880
880
|
"""
|
881
|
-
|
881
|
+
Override fused node activation quantization candidates for all nodes in fused operations,
|
882
882
|
except for the last node in each fused group.
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
883
|
+
Update the value of quantization_config with the value of op_quaitization_cfg from FusingInfo.
|
884
|
+
"""
|
885
|
+
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import CandidateNodeQuantizationConfig
|
886
|
+
|
887
|
+
nodes_in_fln = self.fusing_info.get_inner_fln_nodes()
|
888
|
+
for node in nodes_in_fln:
|
889
|
+
fused_node_op_id = self.fusing_info.get_fused_op_id_for_node(node.name)
|
890
|
+
fusiong_op_quaitization_cfg = self.fusing_info.get_fused_op_quantization_config(fused_node_op_id)
|
891
|
+
org_candidate = node.candidates_quantization_cfg[0]
|
892
|
+
if fusiong_op_quaitization_cfg is not None and fusiong_op_quaitization_cfg.enable_activation_quantization:
|
893
|
+
# Set ActivationQuantizationMode to FLN_QUANT and update the value of quantization_config
|
894
|
+
activation_quantization_cfg = NodeActivationQuantizationConfig(qc=org_candidate,
|
895
|
+
op_cfg=fusiong_op_quaitization_cfg,
|
896
|
+
activation_quantization_fn=org_candidate.activation_quantization_cfg.activation_quantization_fn,
|
897
|
+
activation_quantization_params_fn=org_candidate.activation_quantization_cfg.activation_quantization_params_fn)
|
898
|
+
activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
|
899
|
+
for qc in node.candidates_quantization_cfg:
|
900
|
+
qc.activation_quantization_cfg = activation_quantization_cfg
|
901
|
+
else:
|
902
|
+
# Set ActivationQuantizationMode to FLN_NO_QUANT
|
903
|
+
for qc in node.candidates_quantization_cfg:
|
904
|
+
qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_NO_QUANT
|
888
905
|
|
889
906
|
def validate(self):
|
890
907
|
"""
|
@@ -908,4 +925,4 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
908
925
|
"""
|
909
926
|
Wrap networkx functions (that modifies the graph) with our validate decorator.
|
910
927
|
"""
|
911
|
-
return super().remove_edge(*args, **kwargs)
|
928
|
+
return super().remove_edge(*args, **kwargs)
|
@@ -216,19 +216,31 @@ class BaseNode:
|
|
216
216
|
Returns: Whether node activation quantization is enabled or not.
|
217
217
|
"""
|
218
218
|
return self._is_single_quant_mode(ActivationQuantizationMode.QUANT)
|
219
|
-
|
220
|
-
def
|
219
|
+
|
220
|
+
def is_fln_no_quantization(self) -> bool:
|
221
221
|
"""
|
222
|
-
Returns: Whether
|
222
|
+
Returns: Whether node is FLN no quantization.
|
223
223
|
"""
|
224
|
-
return self._is_single_quant_mode(ActivationQuantizationMode.
|
225
|
-
|
224
|
+
return self._is_single_quant_mode(ActivationQuantizationMode.FLN_NO_QUANT)
|
225
|
+
|
226
226
|
def is_quantization_preserving(self) -> bool:
|
227
227
|
"""
|
228
228
|
Returns: Whether node activation quantization information is preserved from its inputs.
|
229
229
|
"""
|
230
230
|
return self._is_single_quant_mode(ActivationQuantizationMode.PRESERVE_QUANT)
|
231
231
|
|
232
|
+
def is_no_quantization(self) -> bool:
|
233
|
+
"""
|
234
|
+
Returns: Whether node is no quantization.
|
235
|
+
"""
|
236
|
+
return self._is_single_quant_mode(ActivationQuantizationMode.NO_QUANT)
|
237
|
+
|
238
|
+
def is_fln_quantization(self) -> bool:
|
239
|
+
"""
|
240
|
+
Returns: Whether the node's activation quantization is FLN
|
241
|
+
"""
|
242
|
+
return self._is_single_quant_mode(ActivationQuantizationMode.FLN_QUANT)
|
243
|
+
|
232
244
|
def is_weights_quantization_enabled(self, attr_name: str) -> bool:
|
233
245
|
"""
|
234
246
|
Checks whether a node's weights attribute quantization is enabled.
|
@@ -21,7 +21,6 @@ from model_compression_toolkit.constants import FLOAT_BITWIDTH
|
|
21
21
|
from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
|
22
22
|
CandidateNodeQuantizationConfig
|
23
23
|
|
24
|
-
|
25
24
|
def filter_nodes_candidates(graph: Graph):
|
26
25
|
"""
|
27
26
|
Filters the graph's nodes candidates configuration list.
|
@@ -87,7 +86,7 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
|
|
87
86
|
filtered_candidates = copy.deepcopy(node.candidates_quantization_cfg)
|
88
87
|
final_candidates = copy.deepcopy(node.candidates_quantization_cfg)
|
89
88
|
|
90
|
-
if (node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr)) and
|
89
|
+
if (node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr)) and node.is_no_quantization():
|
91
90
|
# If activation quantization is disabled and the node doesn't have a kernel or doesn't quantize the kernel,
|
92
91
|
# but for some reason the node has multiple candidates then replace it with a single dummy candidate with
|
93
92
|
# default bit-width values.
|
@@ -102,9 +101,10 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
|
|
102
101
|
|
103
102
|
final_candidates = [single_dummy_candidate]
|
104
103
|
|
105
|
-
elif
|
104
|
+
elif node.is_no_quantization():
|
106
105
|
# Remove candidates that have duplicated weights candidates for node with disabled activation quantization.
|
107
106
|
# Replacing the activation n_bits in the remained configurations with default value to prevent confusion.
|
107
|
+
# Set the config of the non-quantized FLN node to POWER_OF_TWO.
|
108
108
|
seen_candidates = set()
|
109
109
|
filtered_candidates = [candidate for candidate in filtered_candidates if
|
110
110
|
candidate.weights_quantization_cfg not in seen_candidates
|
@@ -116,6 +116,14 @@ def filter_node_candidates(node: BaseNode) -> List[CandidateNodeQuantizationConf
|
|
116
116
|
|
117
117
|
final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
|
118
118
|
|
119
|
+
elif node.is_fln_no_quantization() or node.is_fln_quantization():
|
120
|
+
# Remove candidates that have duplicated weights candidates for node with disabled activation quantization.
|
121
|
+
seen_candidates = set()
|
122
|
+
filtered_candidates = [candidate for candidate in filtered_candidates if
|
123
|
+
candidate.weights_quantization_cfg not in seen_candidates
|
124
|
+
and not seen_candidates.add(candidate.weights_quantization_cfg)]
|
125
|
+
final_candidates = _filter_bit_method_dups(filtered_candidates, node.kernel_attr)
|
126
|
+
|
119
127
|
elif node.kernel_attr is None or not node.is_weights_quantization_enabled(node.kernel_attr):
|
120
128
|
# TODO:
|
121
129
|
# To allow MP on positional weights we need to modify this to consider all weights not only kernel.
|
@@ -155,7 +155,7 @@ def get_finalized_graph(initial_graph: Graph,
|
|
155
155
|
######################################
|
156
156
|
fusing_info = FusingInfoGenerator(fqc.get_fusing_patterns()).generate_fusing_info(transformed_graph)
|
157
157
|
transformed_graph.fusing_info = fusing_info
|
158
|
-
transformed_graph.
|
158
|
+
transformed_graph.override_fused_node_activation_quantization_candidates()
|
159
159
|
|
160
160
|
######################################
|
161
161
|
# Channel equalization
|
File without changes
|
File without changes
|
{mct_nightly-2.4.0.20250626.617.dist-info → mct_nightly-2.4.0.20250628.555.dist-info}/top_level.txt
RENAMED
File without changes
|