mct-nightly 2.4.0.20250608.655__py3-none-any.whl → 2.4.0.20250609.615__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/METADATA +1 -1
- {mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/RECORD +11 -11
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/fusion/fusing_info.py +29 -11
- model_compression_toolkit/core/common/graph/base_graph.py +1 -1
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +1 -1
- model_compression_toolkit/core/common/model_collector.py +5 -3
- model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +12 -3
- {mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/WHEEL +0 -0
- {mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/licenses/LICENSE.md +0 -0
- {mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/top_level.txt +0 -0
{mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/RECORD
RENAMED
@@ -1,5 +1,5 @@
|
|
1
|
-
mct_nightly-2.4.0.
|
2
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
mct_nightly-2.4.0.20250609.615.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
2
|
+
model_compression_toolkit/__init__.py,sha256=iZK1iC0PgvlQ8g_69klbV_2fHfkRz-cbEc-fJLlJHQY,1557
|
3
3
|
model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
|
4
4
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
5
5
|
model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
|
@@ -16,7 +16,7 @@ model_compression_toolkit/core/common/framework_implementation.py,sha256=JQI_eoZ
|
|
16
16
|
model_compression_toolkit/core/common/framework_info.py,sha256=5tderHT-7Cd21QrRFIJj3hH_gAcnlivOzwZ5m1ldJOs,6526
|
17
17
|
model_compression_toolkit/core/common/memory_computation.py,sha256=ixoSpV5ZYZGyzhre3kQcvR2sNA8KBsPZ3lgbkDnw9Cs,1205
|
18
18
|
model_compression_toolkit/core/common/model_builder_mode.py,sha256=jll9-59OPaE3ug7Y9-lLyV99_FoNHxkGZMgcm0Vkpss,1324
|
19
|
-
model_compression_toolkit/core/common/model_collector.py,sha256=
|
19
|
+
model_compression_toolkit/core/common/model_collector.py,sha256=AbnJVjDlpmQZyj3Ba0XkK76zWh5dqcDFJdKXFTq25uw,13420
|
20
20
|
model_compression_toolkit/core/common/model_validation.py,sha256=LaG8wd6aZl0OJgieE3SeiVDEPxtk8IHq9-3wSnmWhY4,1214
|
21
21
|
model_compression_toolkit/core/common/node_prior_info.py,sha256=WXX_PrGVG9M9I_REG5ZzFBohwmV4yf356sZnrja_FLo,2832
|
22
22
|
model_compression_toolkit/core/common/similarity_analyzer.py,sha256=S3f6WgHyw62dGcxpX51FGKyfebe2zv9ABKbjtGyKRvY,9215
|
@@ -31,10 +31,10 @@ model_compression_toolkit/core/common/collectors/min_max_per_channel_collector.p
|
|
31
31
|
model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=psijsQZefwjMDH8SU5E18n65HiGtQilPhKr1hhzZX-I,8268
|
32
32
|
model_compression_toolkit/core/common/collectors/weighted_histogram_collector.py,sha256=zp3dE7YTqWmkD5QWdRhsl9zD8W6Lr96G1Wjw1g2D3T0,4894
|
33
33
|
model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
34
|
-
model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=
|
34
|
+
model_compression_toolkit/core/common/fusion/fusing_info.py,sha256=Z-O03-DlM4XyllVg5FaQlYeIgk5UqoC8dSA6IlRODNI,22693
|
35
35
|
model_compression_toolkit/core/common/fusion/graph_fuser.py,sha256=yxxxuwrmQ4wLW-PlTu0MEW59LmNJEh1OWy9Li15YH-8,7520
|
36
36
|
model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
|
37
|
-
model_compression_toolkit/core/common/graph/base_graph.py,sha256=
|
37
|
+
model_compression_toolkit/core/common/graph/base_graph.py,sha256=NmGvxGg-UHsNEjz_mtwLhS0HMEM2-pbQPPax1QUw4o0,41378
|
38
38
|
model_compression_toolkit/core/common/graph/base_node.py,sha256=8GEqZ8VMtVvJZuiSUVdokCq6NkFosOssetcod21DwDM,33604
|
39
39
|
model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
|
40
40
|
model_compression_toolkit/core/common/graph/functional_node.py,sha256=GH5wStmw8SoAj5IdT_-ItN1Meo_P5NUTt_5bgJC4fak,3935
|
@@ -72,7 +72,7 @@ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_man
|
|
72
72
|
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=MY8df-c_kITEr_7hOctaxhdiq29hSTA0La9Qo0oTJJY,9678
|
73
73
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
74
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
|
75
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=
|
75
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=4Hl4JmrUAJjSNeT1efMTrM4UzHPr8RQeya0OY-6adWY,40304
|
76
76
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=ZY5yFIDzbaqIk0UzakDBObfsVevn4fydqAfAm4RCikY,4058
|
77
77
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=6Z6nQL9UH7B8dbcUR0cuCTEYFOKZAlvOb-SCk_cAZFA,6670
|
@@ -336,7 +336,7 @@ model_compression_toolkit/exporter/model_exporter/keras/mctq_keras_exporter.py,s
|
|
336
336
|
model_compression_toolkit/exporter/model_exporter/pytorch/__init__.py,sha256=uZ2RigbY9O2PJ0Il8wPpS_s7frgg9WUGd_SHeKGyl1A,699
|
337
337
|
model_compression_toolkit/exporter/model_exporter/pytorch/base_pytorch_exporter.py,sha256=9adOGG1nyviNzuL-1aJXyL0c_VQllSZWiG2gR-puywo,6420
|
338
338
|
model_compression_toolkit/exporter/model_exporter/pytorch/export_serialization_format.py,sha256=bPevy6OBqng41PqytBR55e6cBEuyrUS0H8dWX4zgjQ4,967
|
339
|
-
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=
|
339
|
+
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=1ix8j7rxc1giPjf2PZKwaaCb5pKo0obUvPmRtklmugY,10056
|
340
340
|
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py,sha256=ksWV2A-Njo-wAxQ_Ye2sLIZXBWJ_WNyjT7-qFFwvV2o,2897
|
341
341
|
model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py,sha256=7xuUrHPMiifn23sWfeiqR9wkYhm8EweDRL_vF-JSxMY,6642
|
342
342
|
model_compression_toolkit/exporter/model_wrapper/__init__.py,sha256=7CF2zvpTrIEm8qnbuHnLZyTZkwBBxV24V8QA0oxGbh0,1187
|
@@ -529,7 +529,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
529
529
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
|
530
530
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
531
531
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
532
|
-
mct_nightly-2.4.0.
|
533
|
-
mct_nightly-2.4.0.
|
534
|
-
mct_nightly-2.4.0.
|
535
|
-
mct_nightly-2.4.0.
|
532
|
+
mct_nightly-2.4.0.20250609.615.dist-info/METADATA,sha256=QQwkgGOWQt_9w0Xc9uRQrcvJg370QpweeRXfkzForV0,25087
|
533
|
+
mct_nightly-2.4.0.20250609.615.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
534
|
+
mct_nightly-2.4.0.20250609.615.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
535
|
+
mct_nightly-2.4.0.20250609.615.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.4.0.
|
30
|
+
__version__ = "2.4.0.20250609.000615"
|
@@ -39,13 +39,13 @@ class FusingInfo:
|
|
39
39
|
- 'fusing_patterns': The patterns to generate the fused operators from.
|
40
40
|
- 'manual_fused_ops': List of sequence of node names to handle as fused ops (even if they are not part of the fusing patterns).
|
41
41
|
- `fusing_data`: A dictionary mapping fused operation IDs to lists of nodes that belong to that operation.
|
42
|
-
- `
|
42
|
+
- `node_name_to_fused_op_id`: A dictionary mapping each node name to the ID of the fused operation it belongs to.
|
43
43
|
|
44
44
|
"""
|
45
45
|
fusing_patterns: List[list[any]] = None
|
46
46
|
manual_fused_ops: List[List[str]] = None
|
47
47
|
fusing_data: Dict[str, Tuple['BaseNode']] = field(default_factory=dict)
|
48
|
-
|
48
|
+
node_name_to_fused_op_id: Dict[str, str] = field(init=False, default_factory=dict)
|
49
49
|
fused_op_id_to_quant_config: Dict[str, OpQuantizationConfig] = field(default_factory=dict)
|
50
50
|
|
51
51
|
def __post_init__(self):
|
@@ -64,10 +64,10 @@ class FusingInfo:
|
|
64
64
|
"""
|
65
65
|
Init the node-to-fused-node mapping based on the initial fusing data.
|
66
66
|
"""
|
67
|
-
self.
|
67
|
+
self.node_name_to_fused_op_id.clear()
|
68
68
|
for op_id, nodes in self.fusing_data.items():
|
69
69
|
for node in nodes:
|
70
|
-
self.
|
70
|
+
self.node_name_to_fused_op_id[node.name] = op_id
|
71
71
|
|
72
72
|
def get_manual_nodes_to_fuse(self) -> List[List[str]]:
|
73
73
|
"""
|
@@ -115,7 +115,7 @@ class FusingInfo:
|
|
115
115
|
self.fusing_data[op_id] = nodes
|
116
116
|
# Update the mapping for these nodes
|
117
117
|
for node in nodes:
|
118
|
-
self.
|
118
|
+
self.node_name_to_fused_op_id[node.name] = op_id
|
119
119
|
|
120
120
|
# Update the quantization config mapping for this operation
|
121
121
|
if self.fusing_patterns is not None:
|
@@ -152,7 +152,7 @@ class FusingInfo:
|
|
152
152
|
self._manual_fused_ops.remove(node_names)
|
153
153
|
|
154
154
|
for node in nodes:
|
155
|
-
self.
|
155
|
+
self.node_name_to_fused_op_id.pop(node.name, None)
|
156
156
|
del self.fusing_data[op_id]
|
157
157
|
self.fused_op_id_to_quant_config.pop(op_id, None)
|
158
158
|
|
@@ -166,7 +166,7 @@ class FusingInfo:
|
|
166
166
|
Returns:
|
167
167
|
The name of the fused node containing this node, or None if not fused.
|
168
168
|
"""
|
169
|
-
return self.
|
169
|
+
return self.node_name_to_fused_op_id.get(node_name)
|
170
170
|
|
171
171
|
def get_node_to_fused_node_map(self) -> Dict[str, str]:
|
172
172
|
"""
|
@@ -175,7 +175,7 @@ class FusingInfo:
|
|
175
175
|
Returns:
|
176
176
|
A dictionary mapping each original node name to its fused node name.
|
177
177
|
"""
|
178
|
-
return self.
|
178
|
+
return self.node_name_to_fused_op_id.copy()
|
179
179
|
|
180
180
|
def get_fusing_quantization_config_map(self) -> Dict[str, OpQuantizationConfig]:
|
181
181
|
"""
|
@@ -198,10 +198,12 @@ class FusingInfo:
|
|
198
198
|
"""
|
199
199
|
return self.fusing_data.get(op_id)
|
200
200
|
|
201
|
-
def
|
201
|
+
def get_inner_fln_nodes(self) -> List['BaseNode']:
|
202
202
|
"""
|
203
|
-
Returns a list of the nodes that
|
203
|
+
Returns a list of the nodes that are part but not the last node of an FLN.
|
204
204
|
"""
|
205
|
+
# TODO: the order of the nodes is not gurenteed when returned as dict from get_all_fused_operations -
|
206
|
+
# then, removing the last one can cause issues
|
205
207
|
return [node for nodes in self.get_all_fused_operations().values() for node in nodes[:-1]]
|
206
208
|
|
207
209
|
def get_fused_op_quantization_config(self, op_id: str) -> OpQuantizationConfig:
|
@@ -228,6 +230,22 @@ class FusingInfo:
|
|
228
230
|
"""
|
229
231
|
return any(node in nodes for nodes in self.fusing_data.values())
|
230
232
|
|
233
|
+
def is_quantized_node_in_fln(self, node: 'BaseNode') -> bool:
|
234
|
+
"""
|
235
|
+
Check whether a node inside an FLN and should be quantized.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
node (BaseNode): The node to check.
|
239
|
+
|
240
|
+
Returns:
|
241
|
+
bool: True if the node is in any fused operation and should be quantized.
|
242
|
+
"""
|
243
|
+
if self.is_node_in_fused_op(node):
|
244
|
+
node_q_cfg = self.fused_op_id_to_quant_config[self.node_name_to_fused_op_id[node.name]]
|
245
|
+
return node_q_cfg is not None and node_q_cfg.enable_activation_quantization
|
246
|
+
|
247
|
+
return False
|
248
|
+
|
231
249
|
def get_all_fused_operations(self) -> Dict[str, Tuple['BaseNode']]:
|
232
250
|
"""
|
233
251
|
Retrieve fused information.
|
@@ -340,7 +358,7 @@ class FusingInfo:
|
|
340
358
|
for op_id, nodes in self.fusing_data.items()
|
341
359
|
)
|
342
360
|
mapping_repr = ", ".join(
|
343
|
-
f"{node} -> {op_id}" for node, op_id in self.
|
361
|
+
f"{node} -> {op_id}" for node, op_id in self.node_name_to_fused_op_id.items()
|
344
362
|
)
|
345
363
|
return (
|
346
364
|
f"FusingInfo(\n"
|
@@ -908,7 +908,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
908
908
|
Disable activation quantization for all nodes in fused operations,
|
909
909
|
except for the last node in each fused group.
|
910
910
|
"""
|
911
|
-
nodes_to_disable = self.fusing_info.
|
911
|
+
nodes_to_disable = self.fusing_info.get_inner_fln_nodes()
|
912
912
|
for node in nodes_to_disable:
|
913
913
|
for qc in node.candidates_quantization_cfg:
|
914
914
|
qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
|
@@ -677,7 +677,7 @@ class ResourceUtilizationCalculator:
|
|
677
677
|
elif target_criterion == TargetInclusionCriterion.AnyQuantizedNonFused:
|
678
678
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled() or n.is_quantization_preserving()]
|
679
679
|
# remove fused nodes (due to SNC, where the non-linear is quantized, even though it should not be quantized)
|
680
|
-
nodes = [n for n in nodes if n not in self.graph.fusing_info.
|
680
|
+
nodes = [n for n in nodes if n not in self.graph.fusing_info.get_inner_fln_nodes()]
|
681
681
|
elif target_criterion == TargetInclusionCriterion.QNonConfigurable:
|
682
682
|
nodes = [n for n in nodes if n.is_activation_quantization_enabled() and not n.has_configurable_activation()]
|
683
683
|
elif target_criterion != TargetInclusionCriterion.Any: # pragma: no cover
|
@@ -30,7 +30,8 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
|
|
30
30
|
|
31
31
|
|
32
32
|
def create_stats_collector_for_node(node: common.BaseNode,
|
33
|
-
fw_info: FrameworkInfo
|
33
|
+
fw_info: FrameworkInfo,
|
34
|
+
quant_node_in_fln: bool) -> BaseStatsCollector:
|
34
35
|
"""
|
35
36
|
Gets a node and a groups list and create and return a statistics collector for a node
|
36
37
|
according to whether its statistics should be collected and the prior information we
|
@@ -44,7 +45,7 @@ def create_stats_collector_for_node(node: common.BaseNode,
|
|
44
45
|
Statistics collector for statistics collection for the node.
|
45
46
|
"""
|
46
47
|
|
47
|
-
if node.is_activation_quantization_enabled() or
|
48
|
+
if node.is_activation_quantization_enabled() or quant_node_in_fln:
|
48
49
|
min_output = getattr(node.prior_info, 'min_output', None)
|
49
50
|
max_output = getattr(node.prior_info, 'max_output', None)
|
50
51
|
stats_collector = common.StatsCollector(out_channel_axis=fw_info.out_channel_axis_mapping.get(node.type),
|
@@ -160,7 +161,8 @@ class ModelCollector:
|
|
160
161
|
|
161
162
|
# Assign statistics collectors to nodes
|
162
163
|
for n in graph.get_topo_sorted_nodes():
|
163
|
-
|
164
|
+
quant_node_in_fln = n.is_fln_quantization() and graph.fusing_info.is_quantized_node_in_fln(n)
|
165
|
+
sc = create_stats_collector_for_node(n, fw_info=fw_info, quant_node_in_fln=quant_node_in_fln) # Get static collector for the node
|
164
166
|
# If we use bias correction, and the node has kernel weights to quantize, we need to make sure
|
165
167
|
# its previous nodes' tensors are consistent with this node.
|
166
168
|
kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
|
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py
CHANGED
@@ -78,9 +78,18 @@ if FOUND_ONNX:
|
|
78
78
|
act_holder_list = [n for n, m in self.model.named_modules()
|
79
79
|
if isinstance(m, PytorchActivationQuantizationHolder) and
|
80
80
|
m.activation_holder_quantizer.num_bits > 8]
|
81
|
-
for act_holder in act_holder_list:
|
82
|
-
|
83
|
-
|
81
|
+
for act_holder in act_holder_list: # pragma: no cover
|
82
|
+
obj = self.model
|
83
|
+
attrs = act_holder.split(".")
|
84
|
+
for a in attrs[:-1]:
|
85
|
+
obj = getattr(obj, a)
|
86
|
+
if hasattr(obj, attrs[-1]):
|
87
|
+
delattr(obj, attrs[-1])
|
88
|
+
setattr(obj, attrs[-1], torch.nn.Identity())
|
89
|
+
else:
|
90
|
+
Logger.info(f"During removal of activation quantization of a quantizer (with bits > 8) in ONNX FQ "
|
91
|
+
f"export, deletion of activation holder '{act_holder}' failed — could not locate one or"
|
92
|
+
f"more intermediate attributes in the path.")
|
84
93
|
|
85
94
|
for layer in self.model.children():
|
86
95
|
self.is_layer_exportable_fn(layer)
|
File without changes
|
File without changes
|
{mct_nightly-2.4.0.20250608.655.dist-info → mct_nightly-2.4.0.20250609.615.dist-info}/top_level.txt
RENAMED
File without changes
|