mct-nightly 2.2.0.20240902.511__py3-none-any.whl → 2.2.0.20240904.449__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/METADATA +6 -6
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/RECORD +35 -26
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py +1 -2
- model_compression_toolkit/qat/__init__.py +2 -2
- model_compression_toolkit/qat/common/qat_config.py +1 -19
- model_compression_toolkit/qat/keras/quantization_facade.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py +1 -1
- model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py +1 -1
- model_compression_toolkit/qat/pytorch/quantizer/{base_pytorch_qat_quantizer.py → base_pytorch_qat_weight_quantizer.py} +4 -13
- model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py +6 -116
- model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py +12 -122
- model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py +8 -7
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py +6 -84
- model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py +6 -85
- model_compression_toolkit/trainable_infrastructure/__init__.py +9 -3
- model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py +9 -8
- model_compression_toolkit/trainable_infrastructure/common/training_method.py +31 -0
- model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py +2 -2
- model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py +2 -2
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py +19 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py +22 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/symmetric_lsq.py +111 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/uniform_lsq.py +106 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/__init__.py +14 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/symmetric_ste.py +108 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/uniform_ste.py +105 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py +7 -14
- model_compression_toolkit/{qat/pytorch/quantizer → trainable_infrastructure/pytorch}/quantizer_utils.py +79 -2
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/top_level.txt +0 -0
{mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: mct-nightly
|
3
|
-
Version: 2.2.0.
|
3
|
+
Version: 2.2.0.20240904.449
|
4
4
|
Summary: A Model Compression Toolkit for neural networks
|
5
5
|
Home-page: UNKNOWN
|
6
6
|
License: UNKNOWN
|
@@ -78,11 +78,11 @@ for hands-on learning. For example:
|
|
78
78
|
Currently, MCT is being tested on various Python, Pytorch and TensorFlow versions:
|
79
79
|
|
80
80
|
|
81
|
-
| | PyTorch 2.1 | PyTorch 2.2 | PyTorch 2.3 |
|
82
|
-
|
83
|
-
| Python 3.9 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch23.yml) |
|
84
|
-
| Python 3.10 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch23.yml) |
|
85
|
-
| Python 3.11 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch23.yml) |
|
81
|
+
| | PyTorch 2.1 | PyTorch 2.2 | PyTorch 2.3 | PyTorch 2.4 |
|
82
|
+
|-------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
83
|
+
| Python 3.9 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch23.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python39_pytorch24.yml) |
|
84
|
+
| Python 3.10 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch23.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python310_pytorch24.yml) |
|
85
|
+
| Python 3.11 | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch21.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch22.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch23.yml) | [](https://github.com/sony/model_optimization/actions/workflows/run_tests_python311_pytorch24.yml) |
|
86
86
|
|
87
87
|
|
88
88
|
|
{mct_nightly-2.2.0.20240902.511.dist-info → mct_nightly-2.2.0.20240904.449.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=j0NwTQQJFkPKcEOk_ysFr8-24o0scVFQ47S0VGi7HVA,1573
|
2
2
|
model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -375,7 +375,7 @@ model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=TMus5LYJnTn
|
|
375
375
|
model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
|
376
376
|
model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=fKg-PNOhGBiL-4eySS9Fyw0GkA76Pq8jT_HbJuJ8iZU,4143
|
377
377
|
model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
|
378
|
-
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=
|
378
|
+
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=Lf334209uVFXuRKIFqVvq9RyEcv014Bozt1hr_O6XjQ,4447
|
379
379
|
model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=mDWZERLwtDzqWeJUwHMVyGdlS8wPLjJ3NvZiKBP6BNA,1959
|
380
380
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
381
381
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=oO7WgsAHMnWoXNm_gTKAAe-Nd79mGL_m677ai-ui424,4132
|
@@ -394,33 +394,32 @@ model_compression_toolkit/ptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4d
|
|
394
394
|
model_compression_toolkit/ptq/keras/quantization_facade.py,sha256=DAAJPd6pKLgiwoJT-_u2dvVOO4Ox6IgJgfiUbnNRBwQ,10968
|
395
395
|
model_compression_toolkit/ptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
396
396
|
model_compression_toolkit/ptq/pytorch/quantization_facade.py,sha256=xHVTrm9Fyk_j4j8G1Pb97qacN_gn9cGYpsT1HXdTc1A,9305
|
397
|
-
model_compression_toolkit/qat/__init__.py,sha256=
|
397
|
+
model_compression_toolkit/qat/__init__.py,sha256=b2mURFGsvaZz_CdAD_w2I4Cdu8ZDN-2iGHMBHTKT5ws,1128
|
398
398
|
model_compression_toolkit/qat/common/__init__.py,sha256=6tLZ4R4pYP6QVztLVQC_jik2nES3l4uhML0qUxZrezk,829
|
399
|
-
model_compression_toolkit/qat/common/qat_config.py,sha256=
|
399
|
+
model_compression_toolkit/qat/common/qat_config.py,sha256=xtfVSoyELGXynHNrw86dB9FU3Inu0zwehc3wLrh7JvY,2918
|
400
400
|
model_compression_toolkit/qat/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
401
|
-
model_compression_toolkit/qat/keras/quantization_facade.py,sha256=
|
401
|
+
model_compression_toolkit/qat/keras/quantization_facade.py,sha256=VaZTqK53TOWrXebnJzoHHD99DxOgS4NzHGbmYWaajWA,17274
|
402
402
|
model_compression_toolkit/qat/keras/quantizer/__init__.py,sha256=zmYyCa25_KLCSUCGUDRslh3RCIjcRMxc_oXa54Aui-4,996
|
403
403
|
model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py,sha256=hoY3AETaLSRP7YfecZ32tyUUj-X_DHRWkV8nALYeRlY,2202
|
404
404
|
model_compression_toolkit/qat/keras/quantizer/quant_utils.py,sha256=cBULOgWUodcBO1lHevZggdTevuDYI6tQceV86U2x6DA,2543
|
405
405
|
model_compression_toolkit/qat/keras/quantizer/quantization_builder.py,sha256=HD0JIOiqnrpqj5qk6RyzuCsSGZsDUVohdCYSePmJBNQ,5872
|
406
406
|
model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
407
|
-
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=
|
408
|
-
model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=
|
407
|
+
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=MwHo4qUYTm-cZZ9f4bEDU2fcdO1VdLXcrp8MKhJ051k,12043
|
408
|
+
model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=lGMJF_8jgHV2Rp97aMIqt7B7Gn7JsEOVbBW55K9tvuI,11244
|
409
409
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
410
|
-
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=
|
411
|
-
model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py,sha256=
|
410
|
+
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=fPAC49mBlB5ViaQT_xHUTC8EvH84OsBX3WAPusqYcM8,13538
|
411
|
+
model_compression_toolkit/qat/keras/quantizer/ste_rounding/uniform_ste.py,sha256=6YS0v1qCq5dRqtLKHc2gHaKJWfql84TxtZ7pypaZock,10810
|
412
412
|
model_compression_toolkit/qat/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
413
413
|
model_compression_toolkit/qat/pytorch/quantization_facade.py,sha256=1eg0jMgFzRLYIFnG9GJnJ8U3W4IOM-4Z27s9Wq-JeOQ,13452
|
414
414
|
model_compression_toolkit/qat/pytorch/quantizer/__init__.py,sha256=xYa4C8pr9cG1f3mQQcBXO_u3IdJN-zl7leZxuXDs86w,1003
|
415
|
-
model_compression_toolkit/qat/pytorch/quantizer/
|
416
|
-
model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py,sha256=
|
417
|
-
model_compression_toolkit/qat/pytorch/quantizer/quantizer_utils.py,sha256=nO7IrDRo5b9Asf21WJacE4vf5voD3UzF_oGjBoGusD4,5335
|
415
|
+
model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_weight_quantizer.py,sha256=gjzrnBAZr5c_OrDpSjxpQYa_jKImv7ll52cng07_2oE,1813
|
416
|
+
model_compression_toolkit/qat/pytorch/quantizer/quantization_builder.py,sha256=lM10cGUkkTDtRyLLdWj5Rk0cgvcxp0uaCseyvrnk_Vg,5752
|
418
417
|
model_compression_toolkit/qat/pytorch/quantizer/lsq/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
419
|
-
model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py,sha256=
|
420
|
-
model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py,sha256=
|
418
|
+
model_compression_toolkit/qat/pytorch/quantizer/lsq/symmetric_lsq.py,sha256=VQuS8v-i_dm4koL-gTotoZzeUxveY4dLBuzayUGa7IE,5943
|
419
|
+
model_compression_toolkit/qat/pytorch/quantizer/lsq/uniform_lsq.py,sha256=cOxqop4zZbEBL-sfw0diUDd7WJortGwZPnmlL5-3H7k,5590
|
421
420
|
model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
|
422
|
-
model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py,sha256=
|
423
|
-
model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py,sha256=
|
421
|
+
model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/symmetric_ste.py,sha256=rcYI_qCz_f38VDJ6uZDwDdvvqqpv43vnR8-_zZ4j4CY,6229
|
422
|
+
model_compression_toolkit/qat/pytorch/quantizer/ste_rounding/uniform_ste.py,sha256=btk1V6-wG7-rkOJwUF4BuKcxpvPEIrlEOg27JtLj-vE,5543
|
424
423
|
model_compression_toolkit/target_platform_capabilities/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
425
424
|
model_compression_toolkit/target_platform_capabilities/constants.py,sha256=iJXGy5um7vhC84Me4ld6EHMhy7jPks0T9ItZX23si6s,1519
|
426
425
|
model_compression_toolkit/target_platform_capabilities/immutable.py,sha256=YhROBiXEIB3TU-bAFrnL3qbAsb1yuWPBAQ_CLOJbYUU,1827
|
@@ -485,22 +484,32 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/
|
|
485
484
|
model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py,sha256=rxDkISGCxTB2RaVm59zJWxaJKxGgt4uceDgQ_9E_RmI,10033
|
486
485
|
model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_keras.py,sha256=-4vNf2Q6c_rgaac19AFO8hG4ANaPfgNPf0kN44mL6TQ,6830
|
487
486
|
model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py,sha256=YVJJvqGPBdkKnug99p9bjqtbfecDXZKIB2iWVCe7RUY,5960
|
488
|
-
model_compression_toolkit/trainable_infrastructure/__init__.py,sha256=
|
487
|
+
model_compression_toolkit/trainable_infrastructure/__init__.py,sha256=uewpvlPkH9mBFt8IxoAgIfz6iEcvWbOImm_fb6_BxD8,1543
|
489
488
|
model_compression_toolkit/trainable_infrastructure/common/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
490
|
-
model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py,sha256=
|
489
|
+
model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py,sha256=i5ZX0UnSt_XAgxGyyd7ZRHcocuwTh_FxWgGD2qN7zFc,7735
|
491
490
|
model_compression_toolkit/trainable_infrastructure/common/constants.py,sha256=HN120boJxAnEXNrLSj-o_s-VX4o6C-1ap_KZ4840sd0,875
|
492
491
|
model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py,sha256=Jxd4IjS_t0FwnA_S_WmZeVbh4VM6Da9ahKGPLp6ZhQo,6983
|
493
492
|
model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py,sha256=KoX-6LJMsRzXy0i72ve4buJ32cGNQVHVLqHJxhv0lPQ,3428
|
494
493
|
model_compression_toolkit/trainable_infrastructure/common/quant_utils.py,sha256=zdiew1jwR7tUKm9XWlHnAPxIZsAdKqbzzC2vH02j5wA,1505
|
495
494
|
model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py,sha256=My5Wz34jPOyh8z33OTpKnOobRB0cpO_Qgmtsd5lizHo,4791
|
495
|
+
model_compression_toolkit/trainable_infrastructure/common/training_method.py,sha256=LUoeJkloowhZKuHTiOfzjmSUn2G-4of11-rbnL-h0P4,1194
|
496
496
|
model_compression_toolkit/trainable_infrastructure/keras/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
497
|
-
model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=
|
497
|
+
model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=tHEI9vkLjBzdeCD7eTgAHuUubmnq8GbWSF7Coun8zzE,4116
|
498
498
|
model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py,sha256=txdWXdZoHazg-3MDPb9P-oXRM92LRn2G_8woEplwKaI,4360
|
499
499
|
model_compression_toolkit/trainable_infrastructure/keras/load_model.py,sha256=DJHibcLo-UCuHV6UPLeVd7dKmPfkGXEiLqCCqvQrISM,3769
|
500
|
-
model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha256=
|
500
|
+
model_compression_toolkit/trainable_infrastructure/keras/quantize_wrapper.py,sha256=eVB5FSE3OmTLrhfLUcP2knwN1z2_unQLM-xFEGwdafA,5587
|
501
501
|
model_compression_toolkit/trainable_infrastructure/keras/quantizer_utils.py,sha256=MVwXNymmFRB2NXIBx4e2mdJ1RfoHxRPYRgjb1MQP5kY,1797
|
502
502
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
503
|
-
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=
|
503
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=7ZFf_E8nFao5f38Qk4-GzGxHgrKTHGj-4ohgPzq2Z7k,2304
|
504
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/quantizer_utils.py,sha256=1yOXKghUYfw2hmzbqTuNagIXBoM-wR2bP-ul66-mnDw,7767
|
505
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py,sha256=73CXhqqNTvDpsvlJXclrGJq-vsCUYCI64ILu1y2mtvw,1056
|
506
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py,sha256=X6E6mewWQot_aAkz3UxW5X0-Fjl_aMMjs3A-Af5eL6w,972
|
507
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
508
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/symmetric_lsq.py,sha256=0UGoFHAR-RP9aFbAOILbM8kAG9OwUJJZ_g3Rz58SGlY,5462
|
509
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/uniform_lsq.py,sha256=BPeunWrYNmbduZGXiZKy5t1ubYREX7QqWOXv2Dt85lk,5285
|
510
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
511
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/symmetric_ste.py,sha256=20DEZgn6ZepcjKrATvciaiQNs2VGf5uwF6f6hDJLOVo,5226
|
512
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/ste/uniform_ste.py,sha256=1XHClqM7EhNvYiH6sqs6OI3JUGPfjW55v2eQotVwy8c,5010
|
504
513
|
model_compression_toolkit/xquant/__init__.py,sha256=vdmr8sQw3jIBLF9ck7qrskPoXzDKtksHWlMOkU1JUnQ,1003
|
505
514
|
model_compression_toolkit/xquant/common/__init__.py,sha256=ycb1Xt7PtixY2Uabr94JGSwBMcct66O8ZMVf3Qa3ud8,719
|
506
515
|
model_compression_toolkit/xquant/common/constants.py,sha256=k-9LOEv1n_m8dV4chX0dNOTWyhhF7S00E0lkUxtO84E,1592
|
@@ -527,8 +536,8 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
|
|
527
536
|
model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=bOc-hFL3gdoSM1Th_S2N_-9JJSlPGpZCTx_QLJHS6lg,3388
|
528
537
|
model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
|
529
538
|
model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
|
530
|
-
mct_nightly-2.2.0.
|
531
|
-
mct_nightly-2.2.0.
|
532
|
-
mct_nightly-2.2.0.
|
533
|
-
mct_nightly-2.2.0.
|
534
|
-
mct_nightly-2.2.0.
|
539
|
+
mct_nightly-2.2.0.20240904.449.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
540
|
+
mct_nightly-2.2.0.20240904.449.dist-info/METADATA,sha256=SeHK4yipNqQZ45k1ilwb4IdW_j6-k20YV1ewTWUnZVg,20813
|
541
|
+
mct_nightly-2.2.0.20240904.449.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
542
|
+
mct_nightly-2.2.0.20240904.449.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
|
543
|
+
mct_nightly-2.2.0.20240904.449.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.
|
30
|
+
__version__ = "2.2.0.20240904.000449"
|
@@ -27,7 +27,6 @@ from mct_quantizers.pytorch.quantizers import BasePyTorchInferableQuantizer
|
|
27
27
|
from model_compression_toolkit.logger import Logger
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \
|
29
29
|
get_trainable_quantizer_weights_config
|
30
|
-
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_quantizer import BasePytorchQATTrainableQuantizer
|
31
30
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \
|
32
31
|
get_trainable_quantizer_class
|
33
32
|
|
@@ -35,7 +34,7 @@ from model_compression_toolkit.trainable_infrastructure.common.get_quantizers im
|
|
35
34
|
def quantization_builder(n: common.BaseNode,
|
36
35
|
gptq_config: GradientPTQConfig,
|
37
36
|
kernel_attr: str = None
|
38
|
-
) -> Tuple[Dict[str,
|
37
|
+
) -> Tuple[Dict[str, BasePytorchGPTQTrainableQuantizer], List[BasePyTorchInferableQuantizer]]:
|
39
38
|
"""
|
40
39
|
Build quantizers for a node according to its quantization configuration and
|
41
40
|
a global NoOpQuantizeConfig object.
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from model_compression_toolkit.qat.common.qat_config import QATConfig
|
15
|
+
from model_compression_toolkit.qat.common.qat_config import QATConfig
|
16
16
|
|
17
17
|
from model_compression_toolkit.qat.keras.quantization_facade import keras_quantization_aware_training_init_experimental, keras_quantization_aware_training_finalize_experimental
|
18
|
-
from model_compression_toolkit.qat.pytorch.quantization_facade import pytorch_quantization_aware_training_init_experimental, pytorch_quantization_aware_training_finalize_experimental
|
18
|
+
from model_compression_toolkit.qat.pytorch.quantization_facade import pytorch_quantization_aware_training_init_experimental, pytorch_quantization_aware_training_finalize_experimental
|
@@ -14,10 +14,9 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
|
16
16
|
from typing import Dict
|
17
|
-
from enum import Enum
|
18
17
|
from model_compression_toolkit.core import common
|
19
18
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
20
|
-
from model_compression_toolkit.
|
19
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
21
20
|
|
22
21
|
|
23
22
|
def is_qat_applicable(node: common.BaseNode,
|
@@ -38,23 +37,6 @@ def is_qat_applicable(node: common.BaseNode,
|
|
38
37
|
or node.is_activation_quantization_enabled()
|
39
38
|
|
40
39
|
|
41
|
-
|
42
|
-
class TrainingMethod(Enum):
|
43
|
-
"""
|
44
|
-
An enum for selecting a QAT training method
|
45
|
-
|
46
|
-
STE - Standard straight-through estimator. Includes PowerOfTwo, symmetric & uniform quantizers
|
47
|
-
|
48
|
-
DQA - DNN Quantization with Attention. Includes a smooth quantization introduces by DQA method
|
49
|
-
|
50
|
-
LSQ - Learned Step size Quantization. Includes PowerOfTwo, symmetric & uniform quantizers: https://arxiv.org/pdf/1902.08153.pdf
|
51
|
-
|
52
|
-
"""
|
53
|
-
STE = "STE",
|
54
|
-
DQA = "DQA",
|
55
|
-
LSQ = "LSQ"
|
56
|
-
|
57
|
-
|
58
40
|
class QATConfig:
|
59
41
|
"""
|
60
42
|
QAT configuration class.
|
@@ -24,7 +24,6 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
|
|
24
24
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
|
25
25
|
MixedPrecisionQuantizationConfig
|
26
26
|
from mct_quantizers import KerasActivationQuantizationHolder
|
27
|
-
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
28
27
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
29
28
|
from model_compression_toolkit.core.runner import core_runner
|
30
29
|
from model_compression_toolkit.ptq.runner import ptq_runner
|
@@ -34,6 +33,7 @@ if FOUND_TF:
|
|
34
33
|
from tensorflow.keras.layers import Layer
|
35
34
|
from tensorflow.keras.models import Model
|
36
35
|
|
36
|
+
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
37
37
|
from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
|
38
38
|
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
|
39
39
|
from model_compression_toolkit.core.keras.keras_model_validation import KerasModelValidation
|
@@ -20,7 +20,7 @@ import tensorflow as tf
|
|
20
20
|
from tensorflow.python.framework.tensor_shape import TensorShape
|
21
21
|
from model_compression_toolkit.constants import SIGNED
|
22
22
|
|
23
|
-
from model_compression_toolkit.
|
23
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
24
24
|
|
25
25
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
26
26
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
@@ -18,7 +18,7 @@ from tensorflow.python.framework.tensor_shape import TensorShape
|
|
18
18
|
from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX
|
19
19
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
20
20
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
21
|
-
from model_compression_toolkit.
|
21
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
22
22
|
|
23
23
|
from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
|
24
24
|
from mct_quantizers.keras.quantizers import \
|
@@ -21,7 +21,7 @@ from tensorflow.python.framework.tensor_shape import TensorShape
|
|
21
21
|
from model_compression_toolkit.constants import SIGNED
|
22
22
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
23
23
|
|
24
|
-
from model_compression_toolkit.
|
24
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
25
25
|
|
26
26
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
27
27
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
@@ -18,7 +18,7 @@ from tensorflow.python.framework.tensor_shape import TensorShape
|
|
18
18
|
from model_compression_toolkit.constants import RANGE_MIN, RANGE_MAX
|
19
19
|
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
20
20
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
21
|
-
from model_compression_toolkit.
|
21
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
22
22
|
|
23
23
|
from mct_quantizers import mark_quantizer, QuantizationMethod, QuantizationTarget
|
24
24
|
from mct_quantizers.keras.quantizers import \
|
@@ -24,23 +24,14 @@ from model_compression_toolkit.trainable_infrastructure.pytorch.base_pytorch_qua
|
|
24
24
|
|
25
25
|
if FOUND_TORCH:
|
26
26
|
|
27
|
-
class
|
27
|
+
class BasePytorchQATWeightTrainableQuantizer(BasePytorchTrainableQuantizer):
|
28
28
|
"""
|
29
|
-
A base class for trainable
|
29
|
+
A base class for trainable PyTorch weights quantizer for QAT.
|
30
30
|
"""
|
31
|
-
|
32
|
-
def __init__(self,
|
33
|
-
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
34
|
-
"""
|
35
|
-
Initializes BasePytorchQATTrainableQuantizer object.
|
36
|
-
|
37
|
-
Args:
|
38
|
-
quantization_config: quantizer config class contains all the information about a quantizer configuration.
|
39
|
-
"""
|
40
|
-
super().__init__(quantization_config)
|
31
|
+
pass
|
41
32
|
|
42
33
|
else: # pragma: no cover
|
43
|
-
class
|
34
|
+
class BasePytorchQATWeightTrainableQuantizer(BasePytorchTrainableQuantizer):
|
44
35
|
def __init__(self,
|
45
36
|
quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
|
46
37
|
super().__init__(quantization_config)
|
@@ -18,56 +18,27 @@ import numpy as np
|
|
18
18
|
import torch
|
19
19
|
import torch.nn as nn
|
20
20
|
|
21
|
-
from model_compression_toolkit.qat import TrainingMethod
|
22
21
|
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
23
22
|
from mct_quantizers import PytorchQuantizationWrapper
|
24
23
|
from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
25
24
|
from model_compression_toolkit import constants as C
|
26
|
-
from model_compression_toolkit.qat.pytorch.quantizer.
|
25
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_weight_quantizer import BasePytorchQATWeightTrainableQuantizer
|
27
26
|
from mct_quantizers.common.base_inferable_quantizer import mark_quantizer, QuantizationTarget
|
28
27
|
|
29
28
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
30
|
-
from model_compression_toolkit.
|
29
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
30
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.quantizer_utils import symmetric_lsq_quantizer
|
31
31
|
from mct_quantizers.pytorch.quantizers import \
|
32
|
-
WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
33
|
-
ActivationSymmetricInferableQuantizer
|
32
|
+
WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
34
33
|
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
35
|
-
TrainableQuantizerWeightsConfig
|
34
|
+
TrainableQuantizerWeightsConfig
|
36
35
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
37
36
|
|
38
37
|
|
39
|
-
def symmetric_lsq_quantizer(x: nn.Parameter,
|
40
|
-
thresholds: nn.Parameter,
|
41
|
-
num_bits: int,
|
42
|
-
sign: bool,
|
43
|
-
min_int: int,
|
44
|
-
max_int: int,
|
45
|
-
scale_factor: float) -> Union[nn.Parameter, torch.Tensor]:
|
46
|
-
"""
|
47
|
-
Symmetric quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
48
|
-
Args:
|
49
|
-
x: input to quantize
|
50
|
-
thresholds: thresholds of quantization levels
|
51
|
-
num_bits: number of bits for quantization
|
52
|
-
sign: whether x is signed or not
|
53
|
-
min_int: min clipping integer value
|
54
|
-
max_int: max clipping integer value
|
55
|
-
scale_factor: grad scale of LSQ algorithm
|
56
|
-
Returns:
|
57
|
-
A quantized tensor
|
58
|
-
"""
|
59
|
-
delta = thresholds / (2 ** (num_bits - int(sign)))
|
60
|
-
delta_scaled = grad_scale(delta, scale_factor)
|
61
|
-
rounded = ste_round(x / delta_scaled)
|
62
|
-
clipped = torch.clip(rounded, min=min_int, max=max_int)
|
63
|
-
quantized = delta_scaled * clipped
|
64
|
-
return quantized
|
65
|
-
|
66
|
-
|
67
38
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
68
39
|
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
69
40
|
identifier=TrainingMethod.LSQ)
|
70
|
-
class LSQWeightQATQuantizer(
|
41
|
+
class LSQWeightQATQuantizer(BasePytorchQATWeightTrainableQuantizer):
|
71
42
|
"""
|
72
43
|
Trainable constrained quantizer to quantize layer's weights.
|
73
44
|
"""
|
@@ -145,84 +116,3 @@ class LSQWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
145
116
|
threshold=threshold_values.tolist(),
|
146
117
|
per_channel=self.quantization_config.weights_per_channel_threshold,
|
147
118
|
channel_axis=self.quantization_config.weights_channels_axis)
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
152
|
-
quantization_method=[QuantizationMethod.POWER_OF_TWO, QuantizationMethod.SYMMETRIC],
|
153
|
-
identifier=TrainingMethod.LSQ)
|
154
|
-
class LSQActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
155
|
-
"""
|
156
|
-
Trainable constrained quantizer to quantize layer activations.
|
157
|
-
"""
|
158
|
-
|
159
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
160
|
-
"""
|
161
|
-
Initialize a LSQActivationQATQuantizer object with parameters to use
|
162
|
-
for symmetric or power of two quantization.
|
163
|
-
|
164
|
-
Args:
|
165
|
-
quantization_config: trainable quantizer config class
|
166
|
-
"""
|
167
|
-
super().__init__(quantization_config)
|
168
|
-
self.power_of_two = quantization_config.activation_quantization_method == QuantizationMethod.POWER_OF_TWO
|
169
|
-
self.sign = quantization_config.activation_quantization_params['is_signed']
|
170
|
-
self.threshold_values = np.array([quantization_config.activation_quantization_params[C.THRESHOLD]])
|
171
|
-
self.num_bits = quantization_config.activation_n_bits
|
172
|
-
n_pos_bits = self.num_bits - int(self.sign)
|
173
|
-
self.min_int = -int(self.sign) * (2 ** n_pos_bits)
|
174
|
-
self.max_int = (2 ** n_pos_bits) - 1
|
175
|
-
|
176
|
-
def initialize_quantization(self,
|
177
|
-
tensor_shape: torch.Size,
|
178
|
-
name: str,
|
179
|
-
layer: PytorchQuantizationWrapper):
|
180
|
-
"""
|
181
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
182
|
-
|
183
|
-
Args:
|
184
|
-
tensor_shape: tensor shape of the quantized tensor.
|
185
|
-
name: Tensor name.
|
186
|
-
layer: Layer to quantize.
|
187
|
-
"""
|
188
|
-
layer.register_parameter(name, nn.Parameter(to_torch_tensor(self.threshold_values), requires_grad=True))
|
189
|
-
|
190
|
-
# save the quantizer added parameters for later calculations
|
191
|
-
self.add_quantizer_variable(THRESHOLD_TENSOR, layer.get_parameter(name), VariableGroup.QPARAMS)
|
192
|
-
|
193
|
-
def __call__(self,
|
194
|
-
inputs: torch.Tensor,
|
195
|
-
training: bool = True) -> torch.Tensor:
|
196
|
-
"""
|
197
|
-
Quantize a tensor.
|
198
|
-
Args:
|
199
|
-
inputs: Input tensor to quantize.
|
200
|
-
training: Whether the graph is in training mode.
|
201
|
-
|
202
|
-
Returns:
|
203
|
-
The quantized tensor.
|
204
|
-
"""
|
205
|
-
|
206
|
-
thresholds = self.get_quantizer_variable(THRESHOLD_TENSOR)
|
207
|
-
n_channels = inputs.shape[1]
|
208
|
-
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
209
|
-
inputs_quantized = symmetric_lsq_quantizer(inputs, thresholds, self.num_bits, self.sign, self.min_int, self.max_int, scale_factor)
|
210
|
-
return inputs_quantized
|
211
|
-
|
212
|
-
def convert2inferable(self) -> Union[ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer]:
|
213
|
-
"""
|
214
|
-
Convert quantizer to inferable quantizer.
|
215
|
-
|
216
|
-
Returns:
|
217
|
-
A pytorch inferable quanizer object.
|
218
|
-
"""
|
219
|
-
threshold_values = self.get_quantizer_variable(THRESHOLD_TENSOR).cpu().detach().numpy()
|
220
|
-
if self.power_of_two:
|
221
|
-
pot_threshold = np.power(2.0, np.ceil(np.log2(threshold_values)))
|
222
|
-
return ActivationPOTInferableQuantizer(num_bits=self.num_bits,
|
223
|
-
threshold=pot_threshold.tolist(),
|
224
|
-
signed=self.sign)
|
225
|
-
else:
|
226
|
-
return ActivationSymmetricInferableQuantizer(num_bits=self.num_bits,
|
227
|
-
threshold=threshold_values.tolist(),
|
228
|
-
signed=self.sign)
|
@@ -12,66 +12,32 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import Union
|
16
15
|
import numpy as np
|
17
16
|
import torch
|
18
17
|
import torch.nn as nn
|
19
18
|
|
20
|
-
from model_compression_toolkit.constants import RANGE_MAX, RANGE_MIN
|
21
|
-
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
22
|
-
|
23
|
-
from model_compression_toolkit.qat import TrainingMethod
|
24
|
-
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
25
19
|
from mct_quantizers import QuantizationTarget, PytorchQuantizationWrapper
|
26
|
-
from model_compression_toolkit import constants as C
|
27
|
-
|
28
|
-
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_quantizer import BasePytorchQATTrainableQuantizer
|
29
20
|
from mct_quantizers import mark_quantizer
|
30
|
-
from model_compression_toolkit.qat.pytorch.quantizer.quantizer_utils import ste_round, grad_scale
|
31
|
-
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
32
21
|
from mct_quantizers.pytorch.quantizers import \
|
33
|
-
WeightsUniformInferableQuantizer
|
34
|
-
|
35
|
-
|
22
|
+
WeightsUniformInferableQuantizer
|
23
|
+
|
24
|
+
from model_compression_toolkit.constants import RANGE_MAX, RANGE_MIN
|
25
|
+
from model_compression_toolkit.trainable_infrastructure.common.constants import FQ_MIN, FQ_MAX
|
26
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
27
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.quantizer_utils import uniform_lsq_quantizer
|
36
28
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
37
|
-
from model_compression_toolkit.
|
29
|
+
from model_compression_toolkit.trainable_infrastructure.common.trainable_quantizer_config import \
|
30
|
+
TrainableQuantizerWeightsConfig
|
31
|
+
from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
|
32
|
+
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
38
33
|
from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import fix_range_to_include_zero
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
def uniform_lsq_quantizer(x: nn.Parameter,
|
43
|
-
min_range: nn.Parameter,
|
44
|
-
max_range: nn.Parameter,
|
45
|
-
num_bits: int,
|
46
|
-
min_int: int,
|
47
|
-
max_int: int,
|
48
|
-
scale_factor: float) -> Union[nn.Parameter, torch.Tensor]:
|
49
|
-
"""
|
50
|
-
Uniform quantizer according to LSQ algorithm: https://arxiv.org/pdf/1902.08153.pdf
|
51
|
-
Args:
|
52
|
-
x: input to quantize
|
53
|
-
min_range: min range of quantization values
|
54
|
-
max_range: min range of quantization values
|
55
|
-
num_bits: number of bits for quantization
|
56
|
-
min_int: min clipping integer value
|
57
|
-
max_int: max clipping integer value
|
58
|
-
scale_factor: grad scale of LSQ algorithm
|
59
|
-
Returns:
|
60
|
-
A quantized tensor
|
61
|
-
"""
|
62
|
-
a, b = adjust_range_to_include_zero(min_range, max_range, num_bits)
|
63
|
-
delta = (b - a) / (2 ** num_bits - 1)
|
64
|
-
delta_scaled = grad_scale(delta, scale_factor)
|
65
|
-
rounded = ste_round((x - a) / delta_scaled)
|
66
|
-
clipped = torch.clip(rounded, min=min_int, max=max_int)
|
67
|
-
quantized = delta_scaled * clipped + a
|
68
|
-
return quantized
|
34
|
+
from model_compression_toolkit.qat.pytorch.quantizer.base_pytorch_qat_weight_quantizer import BasePytorchQATWeightTrainableQuantizer
|
69
35
|
|
70
36
|
|
71
37
|
@mark_quantizer(quantization_target=QuantizationTarget.Weights,
|
72
38
|
quantization_method=[QuantizationMethod.UNIFORM],
|
73
39
|
identifier=TrainingMethod.LSQ)
|
74
|
-
class LSQUniformWeightQATQuantizer(
|
40
|
+
class LSQUniformWeightQATQuantizer(BasePytorchQATWeightTrainableQuantizer):
|
75
41
|
"""
|
76
42
|
Trainable constrained quantizer to quantize layer's weights.
|
77
43
|
"""
|
@@ -145,79 +111,3 @@ class LSQUniformWeightQATQuantizer(BasePytorchQATTrainableQuantizer):
|
|
145
111
|
max_range=max_range.tolist(),
|
146
112
|
per_channel=self.quantization_config.weights_per_channel_threshold,
|
147
113
|
channel_axis=self.quantization_config.weights_channels_axis)
|
148
|
-
|
149
|
-
|
150
|
-
@mark_quantizer(quantization_target=QuantizationTarget.Activation,
|
151
|
-
quantization_method=[QuantizationMethod.UNIFORM],
|
152
|
-
identifier=TrainingMethod.LSQ)
|
153
|
-
class LSQUniformActivationQATQuantizer(BasePytorchQATTrainableQuantizer):
|
154
|
-
"""
|
155
|
-
Trainable constrained quantizer to quantize layer activations.
|
156
|
-
"""
|
157
|
-
|
158
|
-
def __init__(self, quantization_config: TrainableQuantizerActivationConfig):
|
159
|
-
"""
|
160
|
-
Initialize a LSQUniformActivationQATQuantizer object with parameters to use
|
161
|
-
for uniform quantization.
|
162
|
-
|
163
|
-
Args:
|
164
|
-
quantization_config: trainable quantizer config class
|
165
|
-
"""
|
166
|
-
super().__init__(quantization_config)
|
167
|
-
self.num_bits = self.quantization_config.activation_n_bits
|
168
|
-
self.min_int = 0
|
169
|
-
self.max_int = 2 ** self.num_bits - 1
|
170
|
-
self.min_range = np.array([quantization_config.activation_quantization_params[C.RANGE_MIN]])
|
171
|
-
self.max_range = np.array([quantization_config.activation_quantization_params[C.RANGE_MAX]])
|
172
|
-
|
173
|
-
def initialize_quantization(self,
|
174
|
-
tensor_shape: torch.Size,
|
175
|
-
name: str,
|
176
|
-
layer: PytorchQuantizationWrapper):
|
177
|
-
"""
|
178
|
-
Add quantizer parameters to the quantizer parameters dictionary
|
179
|
-
|
180
|
-
Args:
|
181
|
-
tensor_shape: tensor shape of the quantized tensor.
|
182
|
-
name: Tensor name.
|
183
|
-
layer: Layer to quantize.
|
184
|
-
"""
|
185
|
-
layer.register_parameter(name+"_"+FQ_MIN, nn.Parameter(to_torch_tensor(self.min_range), requires_grad=True))
|
186
|
-
layer.register_parameter(name+"_"+FQ_MAX, nn.Parameter(to_torch_tensor(self.max_range), requires_grad=True))
|
187
|
-
|
188
|
-
# Save the quantizer parameters for later calculations
|
189
|
-
self.add_quantizer_variable(FQ_MIN, layer.get_parameter(name+"_"+FQ_MIN), VariableGroup.QPARAMS)
|
190
|
-
self.add_quantizer_variable(FQ_MAX, layer.get_parameter(name+"_"+FQ_MAX), VariableGroup.QPARAMS)
|
191
|
-
|
192
|
-
def __call__(self,
|
193
|
-
inputs: torch.Tensor,
|
194
|
-
training: bool = True) -> torch.Tensor:
|
195
|
-
"""
|
196
|
-
Quantize a tensor.
|
197
|
-
Args:
|
198
|
-
inputs: Input tensor to quantize.
|
199
|
-
training: Whether the graph is in training mode.
|
200
|
-
|
201
|
-
Returns:
|
202
|
-
The quantized tensor.
|
203
|
-
"""
|
204
|
-
min_range = self.get_quantizer_variable(FQ_MIN)
|
205
|
-
max_range = self.get_quantizer_variable(FQ_MAX)
|
206
|
-
n_channels = inputs.shape[1]
|
207
|
-
scale_factor = 1.0 / np.sqrt(self.max_int * n_channels)
|
208
|
-
inputs_quantized = uniform_lsq_quantizer(inputs, min_range, max_range, self.num_bits, self.min_int, self.max_int, scale_factor)
|
209
|
-
return inputs_quantized
|
210
|
-
|
211
|
-
def convert2inferable(self) -> ActivationUniformInferableQuantizer:
|
212
|
-
"""
|
213
|
-
Convert quantizer to inferable quantizer.
|
214
|
-
|
215
|
-
Returns:
|
216
|
-
A pytorch inferable quanizer object.
|
217
|
-
"""
|
218
|
-
min_range = self.get_quantizer_variable(FQ_MIN).cpu().detach().numpy()
|
219
|
-
max_range = self.get_quantizer_variable(FQ_MAX).cpu().detach().numpy()
|
220
|
-
min_range, max_range = fix_range_to_include_zero(min_range, max_range, self.num_bits)
|
221
|
-
return ActivationUniformInferableQuantizer(num_bits=self.num_bits,
|
222
|
-
min_range=min_range.tolist(),
|
223
|
-
max_range=max_range.tolist())
|