mct-nightly 2.0.0.20240410.422__py3-none-any.whl → 2.0.0.20240412.408__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/METADATA +2 -2
- {mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/RECORD +36 -27
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/constants.py +4 -0
- model_compression_toolkit/core/common/graph/base_graph.py +3 -2
- model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +2 -2
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py +6 -1
- model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py +5 -0
- model_compression_toolkit/exporter/model_exporter/keras/int8_tflite_exporter.py +5 -0
- model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py +29 -11
- model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +1 -1
- model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +2 -2
- model_compression_toolkit/gptq/keras/gptq_training.py +17 -15
- model_compression_toolkit/gptq/keras/quantization_facade.py +6 -1
- model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py +2 -1
- model_compression_toolkit/gptq/pytorch/gptq_training.py +18 -16
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +6 -1
- model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py +2 -1
- model_compression_toolkit/metadata.py +29 -0
- model_compression_toolkit/ptq/keras/quantization_facade.py +6 -2
- model_compression_toolkit/ptq/pytorch/quantization_facade.py +6 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +4 -1
- model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py +1 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +12 -2
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +210 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +129 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py +111 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/__init__.py +16 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +207 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +129 -0
- model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py +110 -0
- {mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/WHEEL +0 -0
- {mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/top_level.txt +0 -0
{mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mct-nightly
|
|
3
|
-
Version: 2.0.0.
|
|
3
|
+
Version: 2.0.0.20240412.408
|
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
|
5
5
|
Home-page: UNKNOWN
|
|
6
6
|
License: UNKNOWN
|
|
@@ -23,7 +23,7 @@ Requires-Dist: PuLP
|
|
|
23
23
|
Requires-Dist: matplotlib
|
|
24
24
|
Requires-Dist: scipy
|
|
25
25
|
Requires-Dist: protobuf
|
|
26
|
-
Requires-Dist: mct-quantizers ==1.
|
|
26
|
+
Requires-Dist: mct-quantizers ==1.5
|
|
27
27
|
|
|
28
28
|
# Model Compression Toolkit (MCT)
|
|
29
29
|
|
{mct_nightly-2.0.0.20240410.422.dist-info → mct_nightly-2.0.0.20240412.408.dist-info}/RECORD
RENAMED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
|
2
|
-
model_compression_toolkit/constants.py,sha256=
|
|
1
|
+
model_compression_toolkit/__init__.py,sha256=EgLjEka0UxNeko2XoTELtIHjauH-j9CIyzQ86cFqdHg,1573
|
|
2
|
+
model_compression_toolkit/constants.py,sha256=f9at1H_-vb5nvdHRmAHUco4ja4_QermK6yu0N9qbRGE,3723
|
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
|
5
|
+
model_compression_toolkit/metadata.py,sha256=IyoON37lBv3TI0rZGCP4K5t3oYI4TOmYy-LRXOwHGpE,1136
|
|
5
6
|
model_compression_toolkit/core/__init__.py,sha256=TrRgkWpT1AN2Faw1M_1HXyJkJnbxfn9p-RigDZl7pg0,1982
|
|
6
7
|
model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
|
|
7
8
|
model_compression_toolkit/core/graph_prep_runner.py,sha256=Ftqm59hT5TGWmSNkY9bFZkVfCacpGyZfCe-6yZR5WY0,10100
|
|
@@ -29,7 +30,7 @@ model_compression_toolkit/core/common/collectors/statistics_collector.py,sha256=
|
|
|
29
30
|
model_compression_toolkit/core/common/fusion/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
|
30
31
|
model_compression_toolkit/core/common/fusion/layer_fusing.py,sha256=lOubqpc18TslhXZijWUJQAa1c3jIB2S-M-5HK78wJPQ,5548
|
|
31
32
|
model_compression_toolkit/core/common/graph/__init__.py,sha256=Xr-Lt_qXMdrCnnOaUS_OJP_3iTTGfPCLf8_vSrQgCs0,773
|
|
32
|
-
model_compression_toolkit/core/common/graph/base_graph.py,sha256=
|
|
33
|
+
model_compression_toolkit/core/common/graph/base_graph.py,sha256=06mvCb_HHA5iIOdQ31a-nimhrpSA-jYnuV1Ir76QGa8,38259
|
|
33
34
|
model_compression_toolkit/core/common/graph/base_node.py,sha256=jPYpf6sci8LswatxTyygD8ZM5OvsCnxBEWsSl-g64wI,28492
|
|
34
35
|
model_compression_toolkit/core/common/graph/edge.py,sha256=buoSEUZwilWBK3WeBKpJ-GeDaUA1SDdOHxDpxU_bGpk,3784
|
|
35
36
|
model_compression_toolkit/core/common/graph/functional_node.py,sha256=RgwWAoMX7YV5c2gZdTBSX-ziTh3OLbebZXr3jitkxDs,3173
|
|
@@ -113,7 +114,7 @@ model_compression_toolkit/core/common/quantization/quantization_params_generatio
|
|
|
113
114
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/outlier_filter.py,sha256=9gnfJV89jpGwAx8ImJ5E9NjCv3lDtbyulP4OtgWb62M,1772
|
|
114
115
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py,sha256=BiwDqt5CeU6CW0Qusy3LwWhFtf2J9BvSuGMsTsG6rSw,8538
|
|
115
116
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py,sha256=noEdvGiyyW7acgQ2OFWLedCODibTGYJifC9qo8YIU5U,4558
|
|
116
|
-
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=
|
|
117
|
+
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py,sha256=H2D9rdChIviL_j0mF6zy8Qeu_ZXKRu-hLqckSAT1MR8,4352
|
|
117
118
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py,sha256=7kt0JB8PQE0SW9kg8fCwZ5mBkHNgiRrn0of4ZQYQN2A,41524
|
|
118
119
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py,sha256=nug6XgsywxYf57XF_Tnt2xwdf0zLLsajiZKEblo4lFc,3882
|
|
119
120
|
model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py,sha256=QtSAtdAb7sTgtoe9L6DnMFO7rjkOtpzE9kD9xmG7eYM,9743
|
|
@@ -303,29 +304,29 @@ model_compression_toolkit/exporter/model_exporter/fw_agonstic/quantization_forma
|
|
|
303
304
|
model_compression_toolkit/exporter/model_exporter/keras/__init__.py,sha256=uZ2RigbY9O2PJ0Il8wPpS_s7frgg9WUGd_SHeKGyl1A,699
|
|
304
305
|
model_compression_toolkit/exporter/model_exporter/keras/base_keras_exporter.py,sha256=-wr2n0yRlmFixXBeZuxg6Rzlvz-ZFUX-PJgSXhgMrEo,1593
|
|
305
306
|
model_compression_toolkit/exporter/model_exporter/keras/export_serialization_format.py,sha256=v_-rOsWDFI-3k8CoJIr-XzT7ny8WXpAMteWRWtTzaeg,963
|
|
306
|
-
model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py,sha256=
|
|
307
|
-
model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py,sha256=
|
|
308
|
-
model_compression_toolkit/exporter/model_exporter/keras/int8_tflite_exporter.py,sha256=
|
|
307
|
+
model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_keras_exporter.py,sha256=n_iXPwMomMVJTZH9M1WV7OJo11ppXOWkANu41fIlsjY,11702
|
|
308
|
+
model_compression_toolkit/exporter/model_exporter/keras/fakely_quant_tflite_exporter.py,sha256=XoFGkVBikKh1BuxObrMLjfVLDIgy3X0rhmEl08CdJls,3727
|
|
309
|
+
model_compression_toolkit/exporter/model_exporter/keras/int8_tflite_exporter.py,sha256=iTUXaia8XLJmmWdk4iiCah9sxeIyBJy42s9_EpuPhnw,8261
|
|
309
310
|
model_compression_toolkit/exporter/model_exporter/keras/keras_export_facade.py,sha256=Q2hVl0dpH7hcVSVD9Y5BihtEGlHWrLk-_Y2RNPbfQTg,5750
|
|
310
311
|
model_compression_toolkit/exporter/model_exporter/keras/mctq_keras_exporter.py,sha256=qXXkv3X_wb7t622EOHwXIxfGLGaDqh0T0y4UxREi4Bo,1976
|
|
311
312
|
model_compression_toolkit/exporter/model_exporter/pytorch/__init__.py,sha256=uZ2RigbY9O2PJ0Il8wPpS_s7frgg9WUGd_SHeKGyl1A,699
|
|
312
313
|
model_compression_toolkit/exporter/model_exporter/pytorch/base_pytorch_exporter.py,sha256=UPVkEUQCMZ4Lld6CRnEOPEmlfe5vcQZG0Q3FwRBodD4,4021
|
|
313
314
|
model_compression_toolkit/exporter/model_exporter/pytorch/export_serialization_format.py,sha256=bPevy6OBqng41PqytBR55e6cBEuyrUS0H8dWX4zgjQ4,967
|
|
314
|
-
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=
|
|
315
|
+
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py,sha256=r2pOWFK-mSG8OzRiKGVOG4skzX0ZiM0eiRuBsL-ThoI,6067
|
|
315
316
|
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py,sha256=ksWV2A-Njo-wAxQ_Ye2sLIZXBWJ_WNyjT7-qFFwvV2o,2897
|
|
316
|
-
model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py,sha256=
|
|
317
|
+
model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py,sha256=yz5dPMX5r1d9LJV4rYFS1pXqCbVUxvUmV4LELWcRinQ,6350
|
|
317
318
|
model_compression_toolkit/exporter/model_wrapper/__init__.py,sha256=7CF2zvpTrIEm8qnbuHnLZyTZkwBBxV24V8QA0oxGbh0,1187
|
|
318
319
|
model_compression_toolkit/exporter/model_wrapper/fw_agnostic/__init__.py,sha256=pKAdbTCFM_2BrZXUtTIw0ouKotrWwUDF_hP3rPwCM2k,696
|
|
319
320
|
model_compression_toolkit/exporter/model_wrapper/fw_agnostic/get_inferable_quantizers.py,sha256=Bd3QhAR__YC9Xmobd5qHv9ofh_rPn_eTFV0sXizcBnY,2297
|
|
320
321
|
model_compression_toolkit/exporter/model_wrapper/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
321
322
|
model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py,sha256=YffgbVYJG5LKeIsW84Pi7NqzQcvJMeQRnAKQCCmIL6c,3776
|
|
322
323
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
323
|
-
model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py,sha256=
|
|
324
|
+
model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py,sha256=k3UrGAw6vKTmZ-oO1lv0VqK3IpAiet9jlIHyEIoL2u0,5132
|
|
324
325
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/node_to_quantizer.py,sha256=uL6tJWC4s2IWUy8GJVwtMWpwZZioRRztfKyPJHo14xI,9442
|
|
325
326
|
model_compression_toolkit/exporter/model_wrapper/pytorch/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
|
326
327
|
model_compression_toolkit/exporter/model_wrapper/pytorch/validate_layer.py,sha256=uTQcnzvP44CgPO0twsUdiMmTBE_Td6ZdQtz5U0GZuPI,3464
|
|
327
328
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
328
|
-
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py,sha256=
|
|
329
|
+
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py,sha256=D_mEUK1sb4kY5946oErfw3RC5mfBTVaw3LZRIKWYKcE,4918
|
|
329
330
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantizer.py,sha256=4sN5z-6BXrTE5Dp2FX_jKO9ty5iZ2r4RM7XvXtDVLSI,9348
|
|
330
331
|
model_compression_toolkit/gptq/__init__.py,sha256=YKg-tMj9D4Yd0xW9VRD5EN1J5JrmlRbNEF2fOSgodqA,1228
|
|
331
332
|
model_compression_toolkit/gptq/runner.py,sha256=MIg-oBtR1nbHkexySdCJD_XfjRoHSknLotmGBMuD5qM,5924
|
|
@@ -338,14 +339,14 @@ model_compression_toolkit/gptq/common/gptq_training.py,sha256=rLA1xlOO-6gWfmc2dL
|
|
|
338
339
|
model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
339
340
|
model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
|
|
340
341
|
model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
|
|
341
|
-
model_compression_toolkit/gptq/keras/gptq_training.py,sha256=
|
|
342
|
+
model_compression_toolkit/gptq/keras/gptq_training.py,sha256=zyVcEQzdnNsrIz32U1pqqoi08hzxRdJ2CumaPFGwbDM,19123
|
|
342
343
|
model_compression_toolkit/gptq/keras/graph_info.py,sha256=5IvgGlJlgOmQYmldjdCBv7tuzAoY0HazatG5Pedrg0Q,4639
|
|
343
|
-
model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=
|
|
344
|
+
model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=CCV9uyaq-qUGDeXL5OgEWFXSiUkerXrNwFVyA1brrKM,14663
|
|
344
345
|
model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
|
|
345
346
|
model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=2YU-x4-Q5f6hkUJf0tw6vcwdNwRMHdefrFjhhyHYsvA,4782
|
|
346
347
|
model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
|
|
347
348
|
model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=FmK5cPwgLAzrDjHTWf_vbRO5s70S7iwpnjnlqEQTuGE,4408
|
|
348
|
-
model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py,sha256=
|
|
349
|
+
model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py,sha256=guf7ygnLsZeWnTDz4yJdE2iTkd1oE0uQAZwKnGV3OAk,1957
|
|
349
350
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
|
350
351
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=qUuMKysUpjWYjNbchFuyb_UFwzV1HL7R3Y7o0Z5rf60,4016
|
|
351
352
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=BBSDWLmeywjSM5N6oJkMgcuo7zrXTesB4zLwRGG8QB0,12159
|
|
@@ -355,14 +356,14 @@ model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha
|
|
|
355
356
|
model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
356
357
|
model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=kDuWw-6zh17wZpYWh4Xa94rpoodf82DksgjQCnL7nBc,2719
|
|
357
358
|
model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
|
|
358
|
-
model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=
|
|
359
|
+
model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=xkDa62AdIRwv8dEshffALW9Ri66eseEpyUF9taMUKns,16509
|
|
359
360
|
model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=yXJzDd24zfGs2_vfMovxD1WSh1RxXoPxN4GztOf3P5c,3967
|
|
360
|
-
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256
|
|
361
|
+
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=iBLEbLgde6JQNPhJysfT2rl_Sc7-wyoIZnXRAXQWnR0,13065
|
|
361
362
|
model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
|
|
362
363
|
model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=TCA1hAc7raPnrjl06sjFtVM4XUtLtuwAhCGX4U3KGZo,4137
|
|
363
364
|
model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
|
|
364
365
|
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=uT9N_aBj965hvQfKd67fS1B0SXGnOLVcqa3wW4b2iZE,4566
|
|
365
|
-
model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256
|
|
366
|
+
model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=mDWZERLwtDzqWeJUwHMVyGdlS8wPLjJ3NvZiKBP6BNA,1959
|
|
366
367
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
|
367
368
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=oO7WgsAHMnWoXNm_gTKAAe-Nd79mGL_m677ai-ui424,4132
|
|
368
369
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=kLVQC1hXzDpP4Jx7AwnA764oGnY5AMEuvUUhAvhz09M,12347
|
|
@@ -377,9 +378,9 @@ model_compression_toolkit/pruning/pytorch/pruning_facade.py,sha256=cSuvHHCqgr7k9
|
|
|
377
378
|
model_compression_toolkit/ptq/__init__.py,sha256=Z_hkmTh7aLFei1DJKV0oNVUbrv_Q_0CTw-qD85Xf8UM,904
|
|
378
379
|
model_compression_toolkit/ptq/runner.py,sha256=_c1dSjlPPpsx59Vbg1buhG9bZq__OORz1VlPkwjJzoc,2552
|
|
379
380
|
model_compression_toolkit/ptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
380
|
-
model_compression_toolkit/ptq/keras/quantization_facade.py,sha256=
|
|
381
|
+
model_compression_toolkit/ptq/keras/quantization_facade.py,sha256=s6vBCK98l-R12yWASkutPSmNSfPX7457DazroJwhjpo,10517
|
|
381
382
|
model_compression_toolkit/ptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
382
|
-
model_compression_toolkit/ptq/pytorch/quantization_facade.py,sha256=
|
|
383
|
+
model_compression_toolkit/ptq/pytorch/quantization_facade.py,sha256=kkdgBXRBkblBTOW5EaySI_bN4_becSUwbdgOTb7FW2c,9012
|
|
383
384
|
model_compression_toolkit/qat/__init__.py,sha256=kj2qsZh_Ca7PncsHKcaL5EVT2H8g4hYtvaQ3KFxOkwE,1143
|
|
384
385
|
model_compression_toolkit/qat/common/__init__.py,sha256=6tLZ4R4pYP6QVztLVQC_jik2nES3l4uhML0qUxZrezk,829
|
|
385
386
|
model_compression_toolkit/qat/common/qat_config.py,sha256=zoq0Vb74vCY7WlWD8JH_KPrHDoUHSvMc3gcO53u7L2U,3394
|
|
@@ -415,19 +416,19 @@ model_compression_toolkit/target_platform_capabilities/target_platform/current_t
|
|
|
415
416
|
model_compression_toolkit/target_platform_capabilities/target_platform/fusing.py,sha256=f3xBAI6ivPvEj4lw8cAvTKdIbs7CRdLAa_0LvhGw3Dg,3924
|
|
416
417
|
model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py,sha256=xMBarITs2g_pCakep1vlNpM9Au5kQubTtFQAFJhvtYs,14248
|
|
417
418
|
model_compression_toolkit/target_platform_capabilities/target_platform/operators.py,sha256=rRmrmPBY4rxCWVpEc6FxeOPUFh8MkfwgQsqD82U9a7w,3108
|
|
418
|
-
model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py,sha256=
|
|
419
|
+
model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py,sha256=55EQyP4McJBwsw9_l9eseEZ-V2ygQfdRXjjJLF6HdIM,9482
|
|
419
420
|
model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model_component.py,sha256=TDbNQwmF7Id-FoIQZlR7ZOcz_nRb4XKBmDihAgKT0u8,1392
|
|
420
421
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/__init__.py,sha256=WCP1wfFZgM4eFm-pPeUinr5R_aSx5qwfSQqLZCXUNBA,1513
|
|
421
422
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/attribute_filter.py,sha256=jfhszvuD2Fyy6W2KjlLzXBQKFzTqGAaDZeFVr4-ONQw,8776
|
|
422
423
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/current_tpc.py,sha256=fIheShGOnxWYKqT8saHpBJqOU5RG_1Hp9qHry7IviIw,2115
|
|
423
424
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/layer_filter_params.py,sha256=Cl6-mACpje2jM8RJkibbqE3hvTkFR3r26-lW021mIiA,4019
|
|
424
425
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/operations_to_layers.py,sha256=1JN3yvNiJyDfva0tLTH3ej_qORzrQcPz32bSMKl49_0,6720
|
|
425
|
-
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py,sha256=
|
|
426
|
+
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities.py,sha256=KP8IWlHzkXzVjqIiRtAW6sTYyHJ2wVFFX4hMt_N6o3s,9910
|
|
426
427
|
model_compression_toolkit/target_platform_capabilities/target_platform/targetplatform2framework/target_platform_capabilities_component.py,sha256=FvrYI0Qy7DCmDp2gyUYyCZq5pY84JgLtJqSIiVTJ8Ss,1030
|
|
427
428
|
model_compression_toolkit/target_platform_capabilities/tpc_models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
428
429
|
model_compression_toolkit/target_platform_capabilities/tpc_models/get_target_platform_capabilities.py,sha256=aHoAu5Iye9YVn2HLwNb4X9cUDX1WJt20R5GsNGIAk9E,3337
|
|
429
430
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
|
430
|
-
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py,sha256=
|
|
431
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py,sha256=fPOzybGECCWPkAD1hmJryWZrf9vd5Od-UOH6PE0lH94,3820
|
|
431
432
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py,sha256=F5RG4MnuAwKcNXbfVbPFLQu30-lNax-7knqu20B6udQ,1522
|
|
432
433
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/__init__.py,sha256=1mMOREEMoNHu_KTMGDp4crN61opKWX6aFn1DrDLvqcc,717
|
|
433
434
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py,sha256=S-GwMI-JiuPpbtOdd6TSOEjiUFiIs6M2RAiJNJ3O950,10883
|
|
@@ -441,6 +442,14 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_
|
|
|
441
442
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py,sha256=o1KloA8WPw1MbtZ-4p-kxQuroBAL67z77dPpliZyH9o,10369
|
|
442
443
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_keras.py,sha256=NkAGCZbSgXYeRAiJRzt19h2cxkrVQJaHu8-2jHZLOYg,6505
|
|
443
444
|
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tpc_pytorch.py,sha256=X853xDEF-3rcPoqxbrlYN28vvW3buSdM36c_eN_LKx8,5758
|
|
445
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/__init__.py,sha256=vKWAoQ2KkhuptS5HZB50zHG6KY8wHpHTxPugw_nGCRo,717
|
|
446
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py,sha256=8FZjOCaQRwrQLbtmzNrrRj2-VyZMUGzsIWKIDpGVEoQ,10947
|
|
447
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py,sha256=waefIjxpRfjSnJhKfgpR2DZM6B9NLzU-harUerk6oBc,6485
|
|
448
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_pytorch.py,sha256=m6gOYBJZZd31RZHaiovDPLMlpt0HRiJhcKmk73_8380,5732
|
|
449
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/__init__.py,sha256=wUk4Xsg7jpxOWYjq2K3WUwLcI185p_sVPK-ttG0ydhA,721
|
|
450
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py,sha256=T6Hp_Rk15SAz22g_SWDyHJecBpBAjxKt3ezuVEYf4LE,10680
|
|
451
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py,sha256=O7h77xAC92nlpgl6cwuJvL3DKDVBplMbKZEQXXg6p0w,6493
|
|
452
|
+
model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_pytorch.py,sha256=uvHIwmZ1DtA9-XY7SV-b4pSWZde2Ya9MNvGRlVVvdY0,5739
|
|
444
453
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
|
445
454
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/target_platform_capabilities.py,sha256=lnhJcwvTF0t7ybeiTleIS1p0aD8xzFZxVPx4ISk5uWQ,2090
|
|
446
455
|
model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/latest/__init__.py,sha256=UUvUCcTots_sehdRnDfgkaE8WPQ7dPbeuhDF4Qy2nzw,1510
|
|
@@ -471,8 +480,8 @@ model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha
|
|
|
471
480
|
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
|
|
472
481
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
|
473
482
|
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=7bbzqJN8ZAycVDvZr_5xC-niTAR5df8f03Kooev_pfg,3047
|
|
474
|
-
mct_nightly-2.0.0.
|
|
475
|
-
mct_nightly-2.0.0.
|
|
476
|
-
mct_nightly-2.0.0.
|
|
477
|
-
mct_nightly-2.0.0.
|
|
478
|
-
mct_nightly-2.0.0.
|
|
483
|
+
mct_nightly-2.0.0.20240412.408.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
|
484
|
+
mct_nightly-2.0.0.20240412.408.dist-info/METADATA,sha256=7hBFTvvDA-m2U-nbakxLvO7VES7x2VTln6ZpE6JGn68,18795
|
|
485
|
+
mct_nightly-2.0.0.20240412.408.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
486
|
+
mct_nightly-2.0.0.20240412.408.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
|
487
|
+
mct_nightly-2.0.0.20240412.408.dist-info/RECORD,,
|
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
|
27
27
|
from model_compression_toolkit import pruning
|
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
|
29
29
|
|
|
30
|
-
__version__ = "2.0.0.
|
|
30
|
+
__version__ = "2.0.0.20240412.000408"
|
|
@@ -24,6 +24,10 @@ FOUND_ONNX = importlib.util.find_spec("onnx") is not None
|
|
|
24
24
|
FOUND_ONNXRUNTIME = importlib.util.find_spec("onnxruntime") is not None
|
|
25
25
|
FOUND_SONY_CUSTOM_LAYERS = importlib.util.find_spec('sony_custom_layers') is not None
|
|
26
26
|
|
|
27
|
+
# Metadata fields
|
|
28
|
+
MCT_VERSION = 'mct_version'
|
|
29
|
+
TPC_VERSION = 'tpc_version'
|
|
30
|
+
|
|
27
31
|
WEIGHTS_SIGNED = True
|
|
28
32
|
# Minimal threshold to use for quantization ranges:
|
|
29
33
|
MIN_THRESHOLD = (2 ** -16)
|
|
@@ -103,9 +103,10 @@ class Graph(nx.MultiDiGraph, GraphSearches):
|
|
|
103
103
|
if n.is_custom:
|
|
104
104
|
if not is_node_in_tpc:
|
|
105
105
|
Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '
|
|
106
|
-
|
|
106
|
+
' Please add the custom layer to Target Platform Capabilities (TPC), or file a feature '
|
|
107
|
+
'request or an issue if you believe this should be supported.') # pragma: no cover
|
|
107
108
|
if any([qc.default_weight_attr_config.enable_weights_quantization for qc in n.get_qco(tpc).quantization_config_list]):
|
|
108
|
-
Logger.critical(f'Layer identified: {n.type}. MCT does not support weight quantization for Keras custom layers.')
|
|
109
|
+
Logger.critical(f'Layer identified: {n.type}. MCT does not support weight quantization for Keras custom layers.') # pragma: no cover
|
|
109
110
|
|
|
110
111
|
self.tpc = tpc
|
|
111
112
|
|
|
@@ -42,14 +42,14 @@ def calculate_quantization_params(graph: Graph,
|
|
|
42
42
|
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
Logger.info(f"
|
|
45
|
+
Logger.info(f"\nRunning quantization parameters search. "
|
|
46
46
|
f"This process might take some time, "
|
|
47
47
|
f"depending on the model size and the selected quantization methods.\n")
|
|
48
48
|
|
|
49
49
|
# Create a list of nodes to compute their thresholds
|
|
50
50
|
nodes_list: List[BaseNode] = nodes if specific_nodes else graph.nodes()
|
|
51
51
|
|
|
52
|
-
for n in tqdm(nodes_list, "Calculating quantization
|
|
52
|
+
for n in tqdm(nodes_list, "Calculating quantization parameters"): # iterate only nodes that we should compute their thresholds
|
|
53
53
|
for candidate_qc in n.candidates_quantization_cfg:
|
|
54
54
|
for attr in n.get_node_weights_attributes():
|
|
55
55
|
if n.is_weights_quantization_enabled(attr):
|
|
@@ -100,7 +100,7 @@ class FakelyQuantKerasExporter(BaseKerasExporter):
|
|
|
100
100
|
weights_list.append(layer.get_quantized_weights()['kernel'])
|
|
101
101
|
else:
|
|
102
102
|
Logger.critical(f'KerasQuantizationWrapper should wrap only DepthwiseConv2D, Conv2D, Dense'
|
|
103
|
-
|
|
103
|
+
f' and Conv2DTranspose layers but wrapped layer is {layer.layer}')
|
|
104
104
|
|
|
105
105
|
if layer.layer.bias is not None:
|
|
106
106
|
weights_list.append(layer.layer.bias)
|
|
@@ -121,6 +121,11 @@ class FakelyQuantKerasExporter(BaseKerasExporter):
|
|
|
121
121
|
|
|
122
122
|
return layer
|
|
123
123
|
|
|
124
|
+
# Delete metadata layer if exists
|
|
125
|
+
if hasattr(self.model, 'metadata_layer'):
|
|
126
|
+
Logger.info('Metadata is not exported to FakelyQuant models.')
|
|
127
|
+
delattr(self.model, 'metadata_layer')
|
|
128
|
+
|
|
124
129
|
# clone each layer in the model and apply _unwrap_quantize_wrapper to layers wrapped with a QuantizeWrapper.
|
|
125
130
|
self.exported_model = tf.keras.models.clone_model(self.model,
|
|
126
131
|
input_tensors=None,
|
|
@@ -56,6 +56,11 @@ class FakelyQuantTFLiteExporter(FakelyQuantKerasExporter):
|
|
|
56
56
|
(namely, weights that are in fake-quant format) and fake-quant layers for the activations.
|
|
57
57
|
|
|
58
58
|
"""
|
|
59
|
+
# Delete metadata layer if exists
|
|
60
|
+
if hasattr(self.model, 'metadata_layer'):
|
|
61
|
+
Logger.info('Metadata is not exported to TFLite models.')
|
|
62
|
+
delattr(self.model, 'metadata_layer')
|
|
63
|
+
|
|
59
64
|
# Use Keras exporter to quantize model's weights before converting it to TFLite.
|
|
60
65
|
# Since exporter saves the model, we use a tmp path for saving, and then we delete it.
|
|
61
66
|
handle, tmp_file = tempfile.mkstemp(DEFAULT_KERAS_EXPORT_EXTENTION)
|
|
@@ -166,6 +166,11 @@ class INT8TFLiteExporter(FakelyQuantKerasExporter):
|
|
|
166
166
|
|
|
167
167
|
return layer_to_substitue
|
|
168
168
|
|
|
169
|
+
# Delete metadata layer if exists
|
|
170
|
+
if hasattr(self.model, 'metadata_layer'):
|
|
171
|
+
Logger.info('Metadata is not exported to TFLite models.')
|
|
172
|
+
delattr(self.model, 'metadata_layer')
|
|
173
|
+
|
|
169
174
|
# Transform the model to a new model that can be converted to int8 models.
|
|
170
175
|
# For example: replace dense layers with point-wise layers (to support per-channel quantization)
|
|
171
176
|
self.transformed_model = clone_model(self.model,
|
model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py
CHANGED
|
@@ -13,17 +13,21 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
from typing import Callable
|
|
16
|
+
from io import BytesIO
|
|
16
17
|
|
|
17
18
|
import torch.nn
|
|
19
|
+
import onnx
|
|
18
20
|
|
|
19
21
|
from mct_quantizers import PytorchActivationQuantizationHolder, PytorchQuantizationWrapper
|
|
20
22
|
from model_compression_toolkit.logger import Logger
|
|
21
23
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
|
22
24
|
from model_compression_toolkit.exporter.model_exporter.pytorch.base_pytorch_exporter import BasePyTorchExporter
|
|
23
25
|
from mct_quantizers import pytorch_quantizers
|
|
26
|
+
from mct_quantizers.pytorch.metadata import add_onnx_metadata
|
|
24
27
|
|
|
25
28
|
DEFAULT_ONNX_OPSET_VERSION=15
|
|
26
29
|
|
|
30
|
+
|
|
27
31
|
class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
|
|
28
32
|
"""
|
|
29
33
|
Exporter for fakely-quant PyTorch models.
|
|
@@ -58,7 +62,6 @@ class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
|
|
|
58
62
|
self._use_onnx_custom_quantizer_ops = use_onnx_custom_quantizer_ops
|
|
59
63
|
self._onnx_opset_version = onnx_opset_version
|
|
60
64
|
|
|
61
|
-
|
|
62
65
|
def export(self) -> None:
|
|
63
66
|
"""
|
|
64
67
|
Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
|
|
@@ -74,7 +77,7 @@ class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
|
|
|
74
77
|
# If _use_onnx_custom_quantizer_ops is set to True, the quantizer forward function will use
|
|
75
78
|
# the custom implementation when exporting the operator into onnx model. If not, it removes the
|
|
76
79
|
# wraps and quantizes the ops in place (for weights, for activation torch quantization function is
|
|
77
|
-
# exported since it's used during forward.
|
|
80
|
+
# exported since it's used during forward).
|
|
78
81
|
if self._use_onnx_custom_quantizer_ops:
|
|
79
82
|
self._enable_onnx_custom_ops_export()
|
|
80
83
|
else:
|
|
@@ -87,15 +90,30 @@ class FakelyQuantONNXPyTorchExporter(BasePyTorchExporter):
|
|
|
87
90
|
|
|
88
91
|
model_input = to_torch_tensor(next(self.repr_dataset())[0])
|
|
89
92
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
93
|
+
if hasattr(self.model, 'metadata'):
|
|
94
|
+
onnx_bytes = BytesIO()
|
|
95
|
+
torch.onnx.export(self.model,
|
|
96
|
+
model_input,
|
|
97
|
+
onnx_bytes,
|
|
98
|
+
opset_version=self._onnx_opset_version,
|
|
99
|
+
verbose=False,
|
|
100
|
+
input_names=['input'],
|
|
101
|
+
output_names=['output'],
|
|
102
|
+
dynamic_axes={'input': {0: 'batch_size'},
|
|
103
|
+
'output': {0: 'batch_size'}})
|
|
104
|
+
onnx_model = onnx.load_from_string(onnx_bytes.getvalue())
|
|
105
|
+
onnx_model = add_onnx_metadata(onnx_model, self.model.metadata)
|
|
106
|
+
onnx.save_model(onnx_model, self.save_model_path)
|
|
107
|
+
else:
|
|
108
|
+
torch.onnx.export(self.model,
|
|
109
|
+
model_input,
|
|
110
|
+
self.save_model_path,
|
|
111
|
+
opset_version=self._onnx_opset_version,
|
|
112
|
+
verbose=False,
|
|
113
|
+
input_names=['input'],
|
|
114
|
+
output_names=['output'],
|
|
115
|
+
dynamic_axes={'input': {0: 'batch_size'},
|
|
116
|
+
'output': {0: 'batch_size'}})
|
|
99
117
|
|
|
100
118
|
def _enable_onnx_custom_ops_export(self):
|
|
101
119
|
"""
|
|
@@ -40,7 +40,7 @@ if FOUND_TORCH:
|
|
|
40
40
|
repr_dataset: Callable,
|
|
41
41
|
is_layer_exportable_fn: Callable = is_pytorch_layer_exportable,
|
|
42
42
|
serialization_format: PytorchExportSerializationFormat = PytorchExportSerializationFormat.ONNX,
|
|
43
|
-
quantization_format
|
|
43
|
+
quantization_format: QuantizationFormat = QuantizationFormat.MCTQ,
|
|
44
44
|
onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION) -> None:
|
|
45
45
|
"""
|
|
46
46
|
Export a PyTorch quantized model to a torchscript or onnx model.
|
model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py
CHANGED
|
@@ -90,7 +90,7 @@ if FOUND_TF:
|
|
|
90
90
|
fw_impl=C.keras.keras_implementation.KerasImplementation())).build_model()
|
|
91
91
|
exportable_model.trainable = False
|
|
92
92
|
|
|
93
|
-
Logger.info("
|
|
93
|
+
Logger.info("\nPlease run your accuracy evaluation on the exported quantized model to verify it's accuracy.\n"
|
|
94
94
|
"Checkout the FAQ and Troubleshooting pages for resolving common issues and improving the quantized model accuracy:\n"
|
|
95
95
|
"FAQ: https://github.com/sony/model_optimization/tree/main/FAQ.md\n"
|
|
96
96
|
"Quantization Troubleshooting: https://github.com/sony/model_optimization/tree/main/quantization_troubleshooting.md")
|
model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py
CHANGED
|
@@ -82,7 +82,7 @@ if FOUND_TORCH:
|
|
|
82
82
|
get_activation_quantizer_holder(n,
|
|
83
83
|
fw_impl=C.pytorch.pytorch_implementation.PytorchImplementation())).build_model()
|
|
84
84
|
|
|
85
|
-
Logger.info("
|
|
85
|
+
Logger.info("\nPlease run your accuracy evaluation on the exported quantized model to verify it's accuracy.\n"
|
|
86
86
|
"Checkout the FAQ and Troubleshooting pages for resolving common issues and improving the quantized model accuracy:\n"
|
|
87
87
|
"FAQ: https://github.com/sony/model_optimization/tree/main/FAQ.md\n"
|
|
88
88
|
"Quantization Troubleshooting: https://github.com/sony/model_optimization/tree/main/quantization_troubleshooting.md")
|
|
@@ -93,4 +93,4 @@ if FOUND_TORCH:
|
|
|
93
93
|
else:
|
|
94
94
|
def get_exportable_pytorch_model(*args, **kwargs):
|
|
95
95
|
Logger.critical("PyTorch must be installed to use 'get_exportable_pytorch_model'. "
|
|
96
|
-
"The 'torch' package is missing.") # pragma: no cover
|
|
96
|
+
"The 'torch' package is missing.") # pragma: no cover
|
|
@@ -301,21 +301,23 @@ class KerasGPTQTrainer(GPTQTrainer):
|
|
|
301
301
|
Returns: None
|
|
302
302
|
|
|
303
303
|
"""
|
|
304
|
-
|
|
305
|
-
for
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
304
|
+
with tqdm(range(n_epochs), "Running GPTQ optimization") as epochs_pbar:
|
|
305
|
+
for _ in epochs_pbar:
|
|
306
|
+
with tqdm(data_function(), position=1, leave=False) as data_pbar:
|
|
307
|
+
for data in data_pbar:
|
|
308
|
+
input_data = [d * self.input_scale for d in data]
|
|
309
|
+
|
|
310
|
+
loss_value_step, grads = self.nano_training_step(input_data, in_compute_gradients,
|
|
311
|
+
in_optimizer_with_param, is_training)
|
|
312
|
+
# Run one step of gradient descent by updating
|
|
313
|
+
# the value of the variables to minimize the loss.
|
|
314
|
+
for i, (o, p) in enumerate(in_optimizer_with_param):
|
|
315
|
+
o.apply_gradients(zip(grads[i], p))
|
|
316
|
+
if self.gptq_config.log_function is not None:
|
|
317
|
+
self.gptq_config.log_function(loss_value_step, grads[0], in_optimizer_with_param[0][-1],
|
|
318
|
+
self.compare_points)
|
|
319
|
+
self.loss_list.append(loss_value_step.numpy())
|
|
320
|
+
Logger.debug(f'last loss value: {self.loss_list[-1]}')
|
|
319
321
|
|
|
320
322
|
def update_graph(self):
|
|
321
323
|
"""
|
|
@@ -31,6 +31,7 @@ from model_compression_toolkit.core.runner import core_runner
|
|
|
31
31
|
from model_compression_toolkit.gptq.runner import gptq_runner
|
|
32
32
|
from model_compression_toolkit.core.analyzer import analyzer_model_quantization
|
|
33
33
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
|
34
|
+
from model_compression_toolkit.metadata import get_versions_dict
|
|
34
35
|
|
|
35
36
|
LR_DEFAULT = 0.15
|
|
36
37
|
LR_REST_DEFAULT = 1e-4
|
|
@@ -48,6 +49,7 @@ if FOUND_TF:
|
|
|
48
49
|
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
|
|
49
50
|
from model_compression_toolkit.exporter.model_wrapper import get_exportable_keras_model
|
|
50
51
|
from model_compression_toolkit import get_target_platform_capabilities
|
|
52
|
+
from mct_quantizers.keras.metadata import add_metadata
|
|
51
53
|
|
|
52
54
|
# As from TF2.9 optimizers package is changed
|
|
53
55
|
if version.parse(tf.__version__) < version.parse("2.9"):
|
|
@@ -234,7 +236,10 @@ if FOUND_TF:
|
|
|
234
236
|
fw_impl,
|
|
235
237
|
DEFAULT_KERAS_INFO)
|
|
236
238
|
|
|
237
|
-
|
|
239
|
+
exportable_model, user_info = get_exportable_keras_model(tg_gptq)
|
|
240
|
+
if target_platform_capabilities.tp_model.add_metadata:
|
|
241
|
+
exportable_model = add_metadata(exportable_model, get_versions_dict(target_platform_capabilities))
|
|
242
|
+
return exportable_model, user_info
|
|
238
243
|
|
|
239
244
|
else:
|
|
240
245
|
# If tensorflow is not installed,
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
+
from tqdm import tqdm
|
|
15
16
|
from typing import Callable
|
|
16
17
|
|
|
17
18
|
from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
|
|
@@ -35,7 +36,7 @@ def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen:
|
|
|
35
36
|
if gptq_config.rounding_type == RoundingType.SoftQuantizer:
|
|
36
37
|
# dry run on the representative dataset to count number of batches
|
|
37
38
|
num_batches = 0
|
|
38
|
-
for _ in representative_data_gen():
|
|
39
|
+
for _ in tqdm(representative_data_gen(), "GPTQ initialization"):
|
|
39
40
|
num_batches += 1
|
|
40
41
|
|
|
41
42
|
return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
|
|
@@ -248,22 +248,24 @@ class PytorchGPTQTrainer(GPTQTrainer):
|
|
|
248
248
|
data_function: A callable function that give a batch of samples.
|
|
249
249
|
n_epochs: Number of update iterations of representative dataset.
|
|
250
250
|
"""
|
|
251
|
-
|
|
252
|
-
for
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
251
|
+
with tqdm(range(n_epochs), "Running GPTQ optimization") as epochs_pbar:
|
|
252
|
+
for _ in epochs_pbar:
|
|
253
|
+
with tqdm(data_function(), position=1, leave=False) as data_pbar:
|
|
254
|
+
for data in data_pbar:
|
|
255
|
+
input_data = [d * self.input_scale for d in data]
|
|
256
|
+
input_tensor = to_torch_tensor(input_data)
|
|
257
|
+
y_float = self.float_model(input_tensor) # running float model
|
|
258
|
+
loss_value, grads = self.compute_gradients(y_float, input_tensor)
|
|
259
|
+
# Run one step of gradient descent by updating the value of the variables to minimize the loss.
|
|
260
|
+
for (optimizer, _) in self.optimizer_with_param:
|
|
261
|
+
optimizer.step()
|
|
262
|
+
optimizer.zero_grad()
|
|
263
|
+
if self.gptq_config.log_function is not None:
|
|
264
|
+
self.gptq_config.log_function(loss_value.item(),
|
|
265
|
+
torch_tensor_to_numpy(grads),
|
|
266
|
+
torch_tensor_to_numpy(self.optimizer_with_param[0][-1]))
|
|
267
|
+
self.loss_list.append(loss_value.item())
|
|
268
|
+
Logger.debug(f'last loss value: {self.loss_list[-1]}')
|
|
267
269
|
|
|
268
270
|
def update_graph(self) -> Graph:
|
|
269
271
|
"""
|
|
@@ -31,6 +31,7 @@ from model_compression_toolkit.core.analyzer import analyzer_model_quantization
|
|
|
31
31
|
from model_compression_toolkit.core import CoreConfig
|
|
32
32
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
|
33
33
|
MixedPrecisionQuantizationConfig
|
|
34
|
+
from model_compression_toolkit.metadata import get_versions_dict
|
|
34
35
|
|
|
35
36
|
LR_DEFAULT = 1e-4
|
|
36
37
|
LR_REST_DEFAULT = 1e-4
|
|
@@ -47,6 +48,7 @@ if FOUND_TORCH:
|
|
|
47
48
|
from torch.nn import Module
|
|
48
49
|
from torch.optim import Adam, Optimizer
|
|
49
50
|
from model_compression_toolkit import get_target_platform_capabilities
|
|
51
|
+
from mct_quantizers.pytorch.metadata import add_metadata
|
|
50
52
|
DEFAULT_PYTORCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
|
|
51
53
|
|
|
52
54
|
def get_pytorch_gptq_config(n_epochs: int,
|
|
@@ -202,7 +204,10 @@ if FOUND_TORCH:
|
|
|
202
204
|
fw_impl,
|
|
203
205
|
DEFAULT_PYTORCH_INFO)
|
|
204
206
|
|
|
205
|
-
|
|
207
|
+
exportable_model, user_info = get_exportable_pytorch_model(graph_gptq)
|
|
208
|
+
if target_platform_capabilities.tp_model.add_metadata:
|
|
209
|
+
exportable_model = add_metadata(exportable_model, get_versions_dict(target_platform_capabilities))
|
|
210
|
+
return exportable_model, user_info
|
|
206
211
|
|
|
207
212
|
|
|
208
213
|
else:
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
+
from tqdm import tqdm
|
|
15
16
|
from typing import Callable
|
|
16
17
|
|
|
17
18
|
from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
|
|
@@ -35,7 +36,7 @@ def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen:
|
|
|
35
36
|
if gptq_config.rounding_type == RoundingType.SoftQuantizer:
|
|
36
37
|
# dry run on the representative dataset to count number of batches
|
|
37
38
|
num_batches = 0
|
|
38
|
-
for _ in representative_data_gen():
|
|
39
|
+
for _ in tqdm(representative_data_gen(), "GPTQ initialization"):
|
|
39
40
|
num_batches += 1
|
|
40
41
|
|
|
41
42
|
return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
|