mct-nightly 2.2.0.20250120.516__py3-none-any.whl → 2.3.0.20250122.511__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/RECORD +17 -17
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py +3 -2
- model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py +2 -1
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +0 -1
- model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +6 -0
- model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +3 -2
- model_compression_toolkit/core/common/quantization/quantization_config.py +0 -3
- model_compression_toolkit/core/runner.py +3 -1
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +1 -0
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +0 -3
- model_compression_toolkit/metadata.py +1 -0
- model_compression_toolkit/qat/pytorch/quantization_facade.py +9 -12
- {mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: mct-nightly
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.3.0.20250122.511
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: Apache Software License
|
{mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/RECORD
RENAMED
@@ -1,14 +1,14 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=W-VlbEGVB70K-VEhpBiGAs-C2rkPbSEdZ7j7VtmYi2w,1557
|
2
2
|
model_compression_toolkit/constants.py,sha256=i_R6uXBfO1ph_X6DNJych2x59SUojfJbn7dNjs_mZnc,3846
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
5
|
-
model_compression_toolkit/metadata.py,sha256=
|
5
|
+
model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_AhoQW8dNc,4003
|
6
6
|
model_compression_toolkit/verify_packages.py,sha256=TlS-K1EP-QsghqWUW7SDPkAJiUf7ryw4tvhFDe6rCUk,1405
|
7
7
|
model_compression_toolkit/core/__init__.py,sha256=8a0wUNBKwTdJGDk_Ho6WQAXjGuCqQZG1FUxxJlAV8L8,2096
|
8
8
|
model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
|
9
9
|
model_compression_toolkit/core/graph_prep_runner.py,sha256=CVTjBaci8F6EP3IKDnRMfxkP-Sv8qY8GpkGt6FyII2U,11376
|
10
10
|
model_compression_toolkit/core/quantization_prep_runner.py,sha256=OtL6g2rTC5mfdKrkzm47EPPW-voGGVYMYxpy2_sfu1U,6547
|
11
|
-
model_compression_toolkit/core/runner.py,sha256=
|
11
|
+
model_compression_toolkit/core/runner.py,sha256=T3AMorIqKSIQQAdN9XKrqmFBmdXr6H5tVBn5RDgg8as,13876
|
12
12
|
model_compression_toolkit/core/common/__init__.py,sha256=Wh127PbXcETZX_d1PQqZ71ETK3J9XO5A-HpadGUbj6o,1447
|
13
13
|
model_compression_toolkit/core/common/base_substitutions.py,sha256=xDFSmVVs_iFSZfajytI0cuQaNRNcwHX3uqOoHgVUvxQ,1666
|
14
14
|
model_compression_toolkit/core/common/framework_implementation.py,sha256=IkMydCj6voau7dwkYLYA_Ka_EFUKP3GKQdpYN6b1fgc,22163
|
@@ -41,9 +41,9 @@ model_compression_toolkit/core/common/graph/graph_searches.py,sha256=2oKuW6L8hP-
|
|
41
41
|
model_compression_toolkit/core/common/graph/virtual_activation_weights_node.py,sha256=3el-A7j1oyoo1_9zq3faQp7IeRsFXFCvnrb3zZFXpU0,9803
|
42
42
|
model_compression_toolkit/core/common/graph/memory_graph/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
43
43
|
model_compression_toolkit/core/common/graph/memory_graph/bipartite_graph.py,sha256=X6FK3C3y8ixFRPjC_wm3ClloCX8_06SOdA1TRi7o_LA,3800
|
44
|
-
model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=
|
44
|
+
model_compression_toolkit/core/common/graph/memory_graph/compute_graph_max_cut.py,sha256=sUGp9GnKBI5NL7Y6d9pCyAL6Nv_3Htf2wInUtCtNMpU,3497
|
45
45
|
model_compression_toolkit/core/common/graph/memory_graph/cut.py,sha256=7Dfq4TVJIrnencHLJqjhxYKhY7ooUo_ml33WH2IIAgc,2576
|
46
|
-
model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256
|
46
|
+
model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=E8xKMUxtEF0GjztUk-3CmMtivPPBcADnZTUaSN24o6A,17816
|
47
47
|
model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=ISD2BvJWj5mB91jrFjG8VQb0oOoLBoita_thCZWzCPI,4238
|
48
48
|
model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=FCzK4HmX4lWI4qGoGv94wpGv7o6_f5wPBfeBPMerZ18,7752
|
49
49
|
model_compression_toolkit/core/common/hessian/__init__.py,sha256=E7LK3K_1AwMCQokanNc1JODMwUKNOKmwXQiGQ7GO10I,1033
|
@@ -70,11 +70,11 @@ model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_fac
|
|
70
70
|
model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=fe8R1ZdllwL_YQoHgvzTBjoI3GJo4bEVk89I3zEVr14,32463
|
71
71
|
model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=gsigifJ-ykWNafF4t7UMEC_-nd6YPERAk1_z0kT-Y88,27172
|
72
72
|
model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=P8QtKgFXtt5b2RoubzI5OGlCfbEfZsAirjyrkFzK26A,2846
|
73
|
-
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=
|
73
|
+
model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=8oAFJc_KC3z5ClI-zo4KC40kKGscyixUc5oYP4j4cMo,8019
|
74
74
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
75
75
|
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=T5yVr7lay-6QLuTDBZNI1Ufj02EMBWuY_yHjC8eHx5I,3998
|
76
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=
|
77
|
-
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=
|
76
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py,sha256=qdnkkviZZFYqFuRY7o0US_ihH1Iq-X-eG7OwiTVsRBU,34937
|
77
|
+
model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=J7gqUGs4ITo4ufl84A5vACxm670LG6RhQyXkejfpbn8,8834
|
78
78
|
model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
|
79
79
|
model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=uhC0az5OVSfeYexcasoy0cT8ZOonFKIedk_1U-ZPLhA,17171
|
80
80
|
model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
|
@@ -105,7 +105,7 @@ model_compression_toolkit/core/common/quantization/core_config.py,sha256=yxCzWql
|
|
105
105
|
model_compression_toolkit/core/common/quantization/debug_config.py,sha256=zJP2W9apUPX9RstpPWWK71wr9xJsg7j-s7lGV4_bQdc,1510
|
106
106
|
model_compression_toolkit/core/common/quantization/filter_nodes_candidates.py,sha256=IHVX-Gdekru4xLuDTgcsp_JCnRtuVWnbYsDBQuSXTKc,7079
|
107
107
|
model_compression_toolkit/core/common/quantization/node_quantization_config.py,sha256=HctuRvQsT6Dx9A3lLiLEijghPftM-iABcjmDShlPAn4,26527
|
108
|
-
model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=
|
108
|
+
model_compression_toolkit/core/common/quantization/quantization_config.py,sha256=UkSVW7d1OF_Px9gAjsqqK65aYhIBFWaBO-_IH6_AFfg,4403
|
109
109
|
model_compression_toolkit/core/common/quantization/quantization_fn_selection.py,sha256=HfBkSiRTOf9mNF-TNQHTCCs3xSg66F20no0O6vl5v1Y,2154
|
110
110
|
model_compression_toolkit/core/common/quantization/quantization_params_fn_selection.py,sha256=7eG7dl1TcbdnHwgmvyjarxLs0o6Lw_9VAjXAm4rsiBk,3791
|
111
111
|
model_compression_toolkit/core/common/quantization/quantize_graph_weights.py,sha256=N005MSvx8UypVpa7XrxNrB2G732n2wHj3RmLyjTgd3I,2728
|
@@ -379,11 +379,11 @@ model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=_07Zx_43bnNokwR5S8phI
|
|
379
379
|
model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
|
380
380
|
model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=WtehnyiYXdUXf8-uNpV0mdsalF7YF7eKnL7tcFrzZoE,19549
|
381
381
|
model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=4mVM-VvnBaA64ACVdOe6wTGHdMSa2UTLIUe7nACLcdo,4008
|
382
|
-
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=
|
382
|
+
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=ciBrdTZqTNFw-5RleEAM6o5GJq5zNhym2GmAmf6U0_I,17179
|
383
383
|
model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
|
384
384
|
model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=fKg-PNOhGBiL-4eySS9Fyw0GkA76Pq8jT_HbJuJ8iZU,4143
|
385
385
|
model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
|
386
|
-
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=
|
386
|
+
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=dMZ4Aavw8r32CRSh53c5z27_Im7ivKMNyAi9ay7mSKg,4474
|
387
387
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
388
388
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=f7B95Bx-MX-HKheqAUn1GG8cVHFI2ldFReXrUPwk2tY,3002
|
389
389
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=xzTK2apHSSO6MDygDyhrlGgwoIyCsiQqgqLDIX93aao,12291
|
@@ -417,7 +417,7 @@ model_compression_toolkit/qat/keras/quantizer/ste_rounding/__init__.py,sha256=cc
|
|
417
417
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=lXeMPI-n24jbZDGrtOs5eQZ14QvmhFd0e7Y1_QRQxw0,8214
|
418
418
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py,sha256=ZdZwMwLa1Ws2eo3DiQYYTvPS1JfiswZL1xlQPtRnIgE,7067
|
419
419
|
model_compression_toolkit/qat/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
420
|
-
model_compression_toolkit/qat/pytorch/quantization_facade.py,sha256=
|
420
|
+
model_compression_toolkit/qat/pytorch/quantization_facade.py,sha256=M4u2V12y_B_EFoT52iky4J3ebYCkT2cPu7TUKTir0fQ,14082
|
421
421
|
model_compression_toolkit/qat/pytorch/quantizer/__init__.py,sha256=xYa4C8pr9cG1f3mQQcBXO_u3IdJN-zl7leZxuXDs86w,1003
|
422
422
|
model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_weight_quantizer.py,sha256=gjzrnBAZr5c_OrDpSjxpQYa_jKImv7ll52cng07_2oE,1813
|
423
423
|
model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py,sha256=lM10cGUkkTDtRyLLdWj5Rk0cgvcxp0uaCseyvrnk_Vg,5752
|
@@ -523,8 +523,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
523
523
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
|
524
524
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
525
525
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
526
|
-
mct_nightly-2.
|
527
|
-
mct_nightly-2.
|
528
|
-
mct_nightly-2.
|
529
|
-
mct_nightly-2.
|
530
|
-
mct_nightly-2.
|
526
|
+
mct_nightly-2.3.0.20250122.511.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
527
|
+
mct_nightly-2.3.0.20250122.511.dist-info/METADATA,sha256=XzRqjnA-eL0k76UFKe0lPnpFegLxpNE4EaADNqMNaig,26601
|
528
|
+
mct_nightly-2.3.0.20250122.511.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
529
|
+
mct_nightly-2.3.0.20250122.511.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
530
|
+
mct_nightly-2.3.0.20250122.511.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.
|
30
|
+
__version__ = "2.3.0.20250122.000511"
|
@@ -53,9 +53,10 @@ def compute_graph_max_cut(memory_graph: MemoryGraph,
|
|
53
53
|
try:
|
54
54
|
schedule, max_cut_size, cuts = max_cut_astar.solve(estimate=estimate, iter_limit=astar_n_iter,
|
55
55
|
time_limit=None if it == 0 else 300)
|
56
|
-
except TimeoutError:
|
56
|
+
except TimeoutError: # pragma: no cover
|
57
|
+
# TODO: add test for this.
|
57
58
|
if last_result[0] is None:
|
58
|
-
Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.")
|
59
|
+
Logger.critical(f"Max-cut solver stopped on timeout in iteration {it} before finding a solution.")
|
59
60
|
else:
|
60
61
|
Logger.warning(f"Max-cut solver stopped on timeout in iteration {it}.")
|
61
62
|
return last_result
|
@@ -151,7 +151,8 @@ class MaxCutAstar:
|
|
151
151
|
t1 = time()
|
152
152
|
while expansion_count < iter_limit and len(open_list) > 0:
|
153
153
|
if time_limit is not None and time() - t1 > time_limit:
|
154
|
-
|
154
|
+
# TODO: add test for this.
|
155
|
+
raise TimeoutError # pragma: no cover
|
155
156
|
# Choose next node to expand
|
156
157
|
next_cut = self._get_cut_to_expand(open_list, costs, routes, estimate)
|
157
158
|
|
@@ -170,7 +170,6 @@ class ResourceUtilizationCalculator:
|
|
170
170
|
w_total, *_ = self.compute_weights_utilization(target_criterion, bitwidth_mode, w_qcs)
|
171
171
|
|
172
172
|
if {RUTarget.ACTIVATION, RUTarget.TOTAL}.intersection(ru_targets):
|
173
|
-
Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.")
|
174
173
|
a_total = self.compute_activations_utilization(target_criterion, bitwidth_mode, act_qcs)
|
175
174
|
|
176
175
|
ru = ResourceUtilization()
|
@@ -104,6 +104,12 @@ def requires_mixed_precision(in_model: Any,
|
|
104
104
|
|
105
105
|
Returns: A boolean indicating if mixed precision is needed.
|
106
106
|
"""
|
107
|
+
# Any target resource utilization other than weights will always require MP calculation.
|
108
|
+
if target_resource_utilization.activation_restricted() or \
|
109
|
+
target_resource_utilization.total_mem_restricted() or \
|
110
|
+
target_resource_utilization.bops_restricted():
|
111
|
+
return True
|
112
|
+
|
107
113
|
core_config = _create_core_config_for_ru(core_config)
|
108
114
|
|
109
115
|
transformed_graph = graph_preparation_runner(in_model,
|
@@ -68,7 +68,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
|
|
68
68
|
node_candidates = current_node.candidates_quantization_cfg
|
69
69
|
|
70
70
|
# only weights kernel attribute is quantized with weights mixed precision
|
71
|
-
kernel_attr = search_manager.fw_info.get_kernel_op_attributes(current_node)
|
71
|
+
kernel_attr = search_manager.fw_info.get_kernel_op_attributes(current_node.type)
|
72
72
|
kernel_attr = None if kernel_attr is None else kernel_attr[0]
|
73
73
|
valid_candidates = _get_valid_candidates_indices(node_candidates, new_solution[node_idx], kernel_attr)
|
74
74
|
|
@@ -139,8 +139,9 @@ def _get_valid_candidates_indices(node_candidates: List[CandidateNodeQuantizatio
|
|
139
139
|
activation_num_bits = current_candidate.activation_quantization_cfg.activation_n_bits
|
140
140
|
|
141
141
|
# Filter candidates that have higher bit-width for both weights and activations (except for the current index).
|
142
|
+
# TODO: activation bits comparison: should be >= if ACTIVATION or TOTAL ru is used. else should be ==.
|
142
143
|
return [i for i, c in enumerate(node_candidates) if
|
143
|
-
c.activation_quantization_cfg.activation_n_bits
|
144
|
+
c.activation_quantization_cfg.activation_n_bits == activation_num_bits
|
144
145
|
and c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits >= weights_num_bits
|
145
146
|
and not (c.activation_quantization_cfg.activation_n_bits == activation_num_bits
|
146
147
|
and c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits == weights_num_bits)]
|
@@ -78,9 +78,6 @@ class QuantizationConfig:
|
|
78
78
|
>>> qc = mct.core.QuantizationConfig(activation_error_method=mct.core.QuantizationErrorMethod.NOCLIPPING, weights_error_method=mct.core.QuantizationErrorMethod.MSE, relu_bound_to_power_of_2=True, weights_bias_correction=True)
|
79
79
|
|
80
80
|
|
81
|
-
The QuantizationConfig instance can then be used in the quantization workflow,
|
82
|
-
such as with Keras in the function: :func:~model_compression_toolkit.ptq.keras_post_training_quantization`.
|
83
|
-
|
84
81
|
"""
|
85
82
|
|
86
83
|
activation_error_method: QuantizationErrorMethod = QuantizationErrorMethod.MSE
|
@@ -90,9 +90,11 @@ def core_runner(in_model: Any,
|
|
90
90
|
|
91
91
|
# Checking whether to run mixed precision quantization
|
92
92
|
if target_resource_utilization is not None and target_resource_utilization.is_any_restricted():
|
93
|
-
if core_config.mixed_precision_config is None:
|
93
|
+
if core_config.mixed_precision_config is None: # pragma: no cover
|
94
94
|
Logger.critical("Provided an initialized target_resource_utilization, that means that mixed precision quantization is "
|
95
95
|
"enabled, but the provided MixedPrecisionQuantizationConfig is None.")
|
96
|
+
if target_resource_utilization.activation_restricted() or target_resource_utilization.total_mem_restricted():
|
97
|
+
Logger.warning("Using an experimental feature max-cut for activation memory utilization estimation.")
|
96
98
|
# Determine whether to use mixed precision or single precision based on target_resource_utilization.
|
97
99
|
if requires_mixed_precision(in_model,
|
98
100
|
target_resource_utilization,
|
@@ -77,6 +77,7 @@ if FOUND_TORCH:
|
|
77
77
|
regularization_factor (float): A floating point number that defines the regularization factor.
|
78
78
|
hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
|
79
79
|
use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss.
|
80
|
+
gradual_activation_quantization (bool, GradualActivationQuantizationConfig): If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
|
80
81
|
|
81
82
|
returns:
|
82
83
|
a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
|
@@ -16,12 +16,9 @@ from typing import List, Dict, Tuple
|
|
16
16
|
|
17
17
|
from model_compression_toolkit.gptq import GradientPTQConfig
|
18
18
|
from model_compression_toolkit.core import common
|
19
|
-
from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.node_to_quantizer import \
|
20
|
-
get_activation_inferable_quantizer_kwargs
|
21
19
|
from model_compression_toolkit.gptq.pytorch.quantizer.base_pytorch_gptq_quantizer import \
|
22
20
|
BasePytorchGPTQTrainableQuantizer
|
23
21
|
from mct_quantizers import QuantizationTarget
|
24
|
-
from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
|
25
22
|
from mct_quantizers.pytorch.quantizers import BasePyTorchInferableQuantizer
|
26
23
|
|
27
24
|
from model_compression_toolkit.logger import Logger
|
@@ -57,6 +57,7 @@ def get_versions_dict(fqc) -> Dict:
|
|
57
57
|
tpc_minor_version: str = f'{fqc.tpc.tpc_minor_version}'
|
58
58
|
tpc_patch_version: str = f'{fqc.tpc.tpc_patch_version}'
|
59
59
|
tpc_platform_type: str = f'{fqc.tpc.tpc_platform_type}'
|
60
|
+
tpc_name: str = f'{fqc.tpc.name}'
|
60
61
|
tpc_schema: str = f'{fqc.tpc.SCHEMA_VERSION}'
|
61
62
|
|
62
63
|
return asdict(TPCVersions(mct_version))
|
@@ -110,7 +110,6 @@ if FOUND_TORCH:
|
|
110
110
|
User information that may be needed to handle the quantized model.
|
111
111
|
|
112
112
|
Examples:
|
113
|
-
|
114
113
|
Import MCT:
|
115
114
|
|
116
115
|
>>> import model_compression_toolkit as mct
|
@@ -120,21 +119,19 @@ if FOUND_TORCH:
|
|
120
119
|
>>> from torchvision.models import mobilenet_v2
|
121
120
|
>>> model = mobilenet_v2(pretrained=True)
|
122
121
|
|
123
|
-
|
124
|
-
In this example a random dataset of 10 batches each containing 4 images is used.
|
122
|
+
Create a random dataset generator, for required number of calibration iterations (num_calibration_batches). In this example, a random dataset of 10 batches each containing 4 images is used:
|
125
123
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
124
|
+
>>> import numpy as np
|
125
|
+
>>> num_calibration_batches = 10
|
126
|
+
>>> def repr_datagen():
|
127
|
+
>>> for _ in range(num_calibration_batches):
|
128
|
+
>>> yield [np.random.random((4, 3, 224, 224))]
|
131
129
|
|
132
130
|
Create a MCT core config, containing the quantization configuration:
|
133
131
|
|
134
132
|
>>> config = mct.core.CoreConfig()
|
135
133
|
|
136
|
-
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a
|
137
|
-
quantized model. Now the model contains quantizer wrappers for fine tunning the weights:
|
134
|
+
Pass the model, the representative dataset generator, the configuration and the target resource utilization to get a quantized model. Now the model contains quantizer wrappers for fine tunning the weights:
|
138
135
|
|
139
136
|
>>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
|
140
137
|
|
@@ -149,8 +146,8 @@ if FOUND_TORCH:
|
|
149
146
|
if core_config.is_mixed_precision_enabled:
|
150
147
|
if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
|
151
148
|
Logger.critical("Given quantization config to mixed-precision facade is not of type "
|
152
|
-
|
153
|
-
|
149
|
+
"MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API,"
|
150
|
+
"or pass a valid mixed precision configuration.")
|
154
151
|
|
155
152
|
tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
|
156
153
|
fw_impl = PytorchImplementation()
|
{mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|
{mct_nightly-2.2.0.20250120.516.dist-info → mct_nightly-2.3.0.20250122.511.dist-info}/top_level.txt
RENAMED
File without changes
|