mct-nightly 2.2.0.20241025.505__py3-none-any.whl → 2.2.0.20241027.532__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/METADATA +1 -1
- {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/RECORD +26 -21
- model_compression_toolkit/__init__.py +1 -1
- model_compression_toolkit/gptq/common/gptq_constants.py +8 -1
- model_compression_toolkit/gptq/{pytorch/quantizer → common}/gradual_activation_quantization.py +10 -10
- model_compression_toolkit/gptq/{pytorch/quantizer → common}/regularization_factory.py +25 -11
- model_compression_toolkit/gptq/keras/gptq_training.py +26 -11
- model_compression_toolkit/gptq/keras/quantization_facade.py +35 -24
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +10 -9
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py +5 -45
- model_compression_toolkit/gptq/pytorch/gptq_training.py +13 -9
- model_compression_toolkit/gptq/pytorch/quantization_facade.py +3 -13
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +2 -4
- model_compression_toolkit/trainable_infrastructure/common/annealing_schedulers.py +68 -0
- model_compression_toolkit/trainable_infrastructure/keras/annealing_schedulers.py +32 -0
- model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py +10 -18
- tests_pytest/keras/gptq/__init__.py +14 -0
- tests_pytest/keras/gptq/test_gradual_act_quantization.py +102 -0
- tests_pytest/keras/trainable_infrastructure/__init__.py +16 -0
- tests_pytest/keras/trainable_infrastructure/test_linear_annealing.py +49 -0
- tests_pytest/pytorch/gptq/test_gradual_act_quantization.py +4 -4
- tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py +4 -4
- model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py +0 -44
- {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/LICENSE.md +0 -0
- {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/WHEEL +0 -0
- {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/top_level.txt +0 -0
- /model_compression_toolkit/trainable_infrastructure/{pytorch → common}/util.py +0 -0
{mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/RECORD
RENAMED
@@ -1,4 +1,4 @@
|
|
1
|
-
model_compression_toolkit/__init__.py,sha256=
|
1
|
+
model_compression_toolkit/__init__.py,sha256=e7Hzdcg92ykZG1qmS--aAUq-lsyc1Xp_WN__EKEbiDk,1573
|
2
2
|
model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
|
3
3
|
model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
|
4
4
|
model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
|
@@ -347,23 +347,24 @@ model_compression_toolkit/gptq/__init__.py,sha256=pEgkJvmf05KSw70iLDTz_6LI_2Oi5L
|
|
347
347
|
model_compression_toolkit/gptq/runner.py,sha256=La12JTYjWyJW0YW4Al4TP1_Xi4JWBCEKw6FR_JQsxe0,5982
|
348
348
|
model_compression_toolkit/gptq/common/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
349
349
|
model_compression_toolkit/gptq/common/gptq_config.py,sha256=Z6T5B3q4k2Tlr2bBWvC6TAF3d2opyA7ZT_D_mz6D1_0,6297
|
350
|
-
model_compression_toolkit/gptq/common/gptq_constants.py,sha256=
|
350
|
+
model_compression_toolkit/gptq/common/gptq_constants.py,sha256=D1x2n4-NdAx6g_1Wc2hwwh4vX9vmx5VnQWN26H107kg,766
|
351
351
|
model_compression_toolkit/gptq/common/gptq_framework_implementation.py,sha256=n3mSf4J92kFjekzyGyrJULylI-8Jf5OVWJ5AFoVnEx0,1266
|
352
352
|
model_compression_toolkit/gptq/common/gptq_graph.py,sha256=-bL5HhPcKqV8nj4dZPXc5QmQJbFBel6etrioikP0tEo,3039
|
353
353
|
model_compression_toolkit/gptq/common/gptq_training.py,sha256=tt4O8PjSChquzl4c6NojvQWZmvCdTxcMLtmEVIGx1ns,13252
|
354
|
+
model_compression_toolkit/gptq/common/gradual_activation_quantization.py,sha256=EgpzMs_aDoB0wQiTagqvcxCTfrgNUuCfdXEXmfNiyb0,3780
|
355
|
+
model_compression_toolkit/gptq/common/regularization_factory.py,sha256=hyunpXepVeHyoAFJw6zNLK-3ZHBmiut3lmNisJN_L3E,2514
|
354
356
|
model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
355
357
|
model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
|
356
358
|
model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
|
357
|
-
model_compression_toolkit/gptq/keras/gptq_training.py,sha256=
|
359
|
+
model_compression_toolkit/gptq/keras/gptq_training.py,sha256=TEWqAU8JZnZVZ-dIkINA0x1NmSrYpEkXTdG835JdKnI,20848
|
358
360
|
model_compression_toolkit/gptq/keras/graph_info.py,sha256=MKIfrRTRH3zCuxCR1g9ZVIFyuSSr0e0sDybqh4LDM7E,4672
|
359
|
-
model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=
|
361
|
+
model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=DhEEpW0rK4JRdk5WQlN-_DOUuzlwOBqpiwTBOySjn2g,16820
|
360
362
|
model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
|
361
363
|
model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=Rbl9urzkmACvVxICSEyJ02qFOBxWK0UQWtysFJzBVZw,4899
|
362
364
|
model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
|
363
|
-
model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=
|
364
|
-
model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py,sha256=guf7ygnLsZeWnTDz4yJdE2iTkd1oE0uQAZwKnGV3OAk,1957
|
365
|
+
model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=rst-u5EB9Xss4ndKqi297WvZ-9RVee2TAUVFelPVKhU,4663
|
365
366
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
366
|
-
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=
|
367
|
+
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=REO-pIXpT4ZuJzhizvQjz6vn7Vxnq7k0KvikuQ4FDkE,2769
|
367
368
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=BBSDWLmeywjSM5N6oJkMgcuo7zrXTesB4zLwRGG8QB0,12159
|
368
369
|
model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py,sha256=pyhlVpoauHM-zuixHsIGPHFgQoXppL8TlDFCjPE2RuY,10377
|
369
370
|
model_compression_toolkit/gptq/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
@@ -371,15 +372,13 @@ model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha
|
|
371
372
|
model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
372
373
|
model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=_07Zx_43bnNokwR5S8phIqeu5-_7_5VBT4DT-FCw7Do,3892
|
373
374
|
model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
|
374
|
-
model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=
|
375
|
+
model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=2KwJFlJj6hFJClsJbC9aaWDAGbZUNDbSx1d-QX4LShc,22132
|
375
376
|
model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=4mVM-VvnBaA64ACVdOe6wTGHdMSa2UTLIUe7nACLcdo,4008
|
376
|
-
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=
|
377
|
+
model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=lY7_lNtS1SqaaJ0gc6C7_HO71bBalsxQY37QQlWpu70,15479
|
377
378
|
model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
|
378
379
|
model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=fKg-PNOhGBiL-4eySS9Fyw0GkA76Pq8jT_HbJuJ8iZU,4143
|
379
|
-
model_compression_toolkit/gptq/pytorch/quantizer/gradual_activation_quantization.py,sha256=nngu2TeXjngkqt_6-wciFmCvo-dbpeh_tJJxBV_cfHk,3686
|
380
380
|
model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
|
381
381
|
model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=5EyAzvlU01vLyXmMwY_8dNyb7GwYktXmnrvUON8n8WI,4696
|
382
|
-
model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=H6pARLK-jq3cKoaipY0SK9wMGrqy6CSEZTk14KdrKA0,2105
|
383
382
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
384
383
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=f7B95Bx-MX-HKheqAUn1GG8cVHFI2ldFReXrUPwk2tY,3002
|
385
384
|
model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=kLVQC1hXzDpP4Jx7AwnA764oGnY5AMEuvUUhAvhz09M,12347
|
@@ -407,7 +406,7 @@ model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py
|
|
407
406
|
model_compression_toolkit/qat/keras/quantizer/quant_utils.py,sha256=cBULOgWUodcBO1lHevZggdTevuDYI6tQceV86U2x6DA,2543
|
408
407
|
model_compression_toolkit/qat/keras/quantizer/quantization_builder.py,sha256=hGizGBbOGZpD-w3wg-LlehUYJDWLk91VUdfVwwG2Z78,5882
|
409
408
|
model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
|
410
|
-
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=
|
409
|
+
model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=ujxb7hYHj25QRwu7SScP8BZXWQdh61knzk68KRIdOp0,6501
|
411
410
|
model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=vGUs9b0IHTydCA5tN7iekuhf1LHNgIrSF5sXMD1WsSI,6476
|
412
411
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
|
413
412
|
model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=pFkrjtlavCniswcO3-Djlh6a_Hz1rrcEa7Z5wTGVRCU,8270
|
@@ -493,6 +492,7 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/
|
|
493
492
|
model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py,sha256=YVJJvqGPBdkKnug99p9bjqtbfecDXZKIB2iWVCe7RUY,5960
|
494
493
|
model_compression_toolkit/trainable_infrastructure/__init__.py,sha256=uewpvlPkH9mBFt8IxoAgIfz6iEcvWbOImm_fb6_BxD8,1543
|
495
494
|
model_compression_toolkit/trainable_infrastructure/common/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
495
|
+
model_compression_toolkit/trainable_infrastructure/common/annealing_schedulers.py,sha256=qm2_wa61nga08Jdcl3RkgTsJ0zyHNjZ_A6I2--oVOig,2455
|
496
496
|
model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py,sha256=IF50ASBUvVrOVqlJ1nHNxZxKXSuCanjhUX0YjMB-rRg,7946
|
497
497
|
model_compression_toolkit/trainable_infrastructure/common/constants.py,sha256=HN120boJxAnEXNrLSj-o_s-VX4o6C-1ap_KZ4840sd0,875
|
498
498
|
model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py,sha256=Jxd4IjS_t0FwnA_S_WmZeVbh4VM6Da9ahKGPLp6ZhQo,6983
|
@@ -500,7 +500,9 @@ model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py,sha2
|
|
500
500
|
model_compression_toolkit/trainable_infrastructure/common/quant_utils.py,sha256=zdiew1jwR7tUKm9XWlHnAPxIZsAdKqbzzC2vH02j5wA,1505
|
501
501
|
model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py,sha256=My5Wz34jPOyh8z33OTpKnOobRB0cpO_Qgmtsd5lizHo,4791
|
502
502
|
model_compression_toolkit/trainable_infrastructure/common/training_method.py,sha256=LUoeJkloowhZKuHTiOfzjmSUn2G-4of11-rbnL-h0P4,1194
|
503
|
+
model_compression_toolkit/trainable_infrastructure/common/util.py,sha256=oKuWi7E07a8zv5x9auhBugYE2RUQ7ojDh2XCs5koYJY,1090
|
503
504
|
model_compression_toolkit/trainable_infrastructure/keras/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
505
|
+
model_compression_toolkit/trainable_infrastructure/keras/annealing_schedulers.py,sha256=sISNVxPsdm-Nd95PhoPSJ-2tFpINGlfrU7ZXaCByI-o,1278
|
504
506
|
model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=LBc26z8pkpbcdKMTxpNBg5IyChLreHQ1lRgCVjNE37o,4202
|
505
507
|
model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py,sha256=txdWXdZoHazg-3MDPb9P-oXRM92LRn2G_8woEplwKaI,4360
|
506
508
|
model_compression_toolkit/trainable_infrastructure/keras/load_model.py,sha256=DJHibcLo-UCuHV6UPLeVd7dKmPfkGXEiLqCCqvQrISM,3769
|
@@ -515,10 +517,9 @@ model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/s
|
|
515
517
|
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py,sha256=THY5eZ_69D1yzkXLhLg84ON_deNUAD_qMJ6A5C5znDM,7359
|
516
518
|
model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py,sha256=XEypYorBnSBLj6sh1pHCNaSjeCToYVlERWIHxUoXvuc,5733
|
517
519
|
model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
|
518
|
-
model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py,sha256=
|
520
|
+
model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py,sha256=W5NPQiwIAd2dpaoU9WfRwSt0ljrrePj5lwPk6d1yVwQ,1333
|
519
521
|
model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=lWc5EG3ptrP85n69EHGKFkIadnrKEBMKnB5YXQ5AmXo,2745
|
520
522
|
model_compression_toolkit/trainable_infrastructure/pytorch/quantizer_utils.py,sha256=1yOXKghUYfw2hmzbqTuNagIXBoM-wR2bP-ul66-mnDw,7767
|
521
|
-
model_compression_toolkit/trainable_infrastructure/pytorch/util.py,sha256=oKuWi7E07a8zv5x9auhBugYE2RUQ7ojDh2XCs5koYJY,1090
|
522
523
|
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py,sha256=73CXhqqNTvDpsvlJXclrGJq-vsCUYCI64ILu1y2mtvw,1056
|
523
524
|
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py,sha256=X6E6mewWQot_aAkz3UxW5X0-Fjl_aMMjs3A-Af5eL6w,972
|
524
525
|
model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
@@ -557,16 +558,20 @@ tests_pytest/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
|
557
558
|
tests_pytest/keras/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
558
559
|
tests_pytest/keras/core/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
559
560
|
tests_pytest/keras/core/test_data_util.py,sha256=XSoPu_ci1xy2EtK-3OWGpESr-Meg1GDaxuSvcj3yt-w,3915
|
561
|
+
tests_pytest/keras/gptq/__init__.py,sha256=pKAdbTCFM_2BrZXUtTIw0ouKotrWwUDF_hP3rPwCM2k,696
|
562
|
+
tests_pytest/keras/gptq/test_gradual_act_quantization.py,sha256=iwKaLI7QQ8H3qj6zmwwfd2ZOwRcCr8T-v_4llSh_chM,4804
|
563
|
+
tests_pytest/keras/trainable_infrastructure/__init__.py,sha256=DvaMXJtJZHAqOm96NdfBiNQsbN2sc9bG2kkyY-mpPh8,710
|
564
|
+
tests_pytest/keras/trainable_infrastructure/test_linear_annealing.py,sha256=dZjrMHVIiEVRNDYR3a4lZaXF2ElxFx32KAXXQvDz-v8,1793
|
560
565
|
tests_pytest/pytorch/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
561
566
|
tests_pytest/pytorch/core/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
562
567
|
tests_pytest/pytorch/core/test_data_util.py,sha256=Bg3c21YVfXE1SAUlTao553gXcITTKF4CPeKtl3peBTE,5604
|
563
568
|
tests_pytest/pytorch/gptq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
564
569
|
tests_pytest/pytorch/gptq/test_annealing_cfg.py,sha256=hGC7L6mp3N1ygcJ3OctgS_Fz2JY75q5aswolJkbHkZM,2208
|
565
|
-
tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=
|
570
|
+
tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=Dg2cg1X8u9Jxm7Y6tlZIGH81EPoW_vYorcdDExdj02w,4630
|
566
571
|
tests_pytest/pytorch/trainable_infrastructure/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
|
567
|
-
tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=
|
568
|
-
mct_nightly-2.2.0.
|
569
|
-
mct_nightly-2.2.0.
|
570
|
-
mct_nightly-2.2.0.
|
571
|
-
mct_nightly-2.2.0.
|
572
|
-
mct_nightly-2.2.0.
|
572
|
+
tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=zErt9tOu7oupjpv08cvd1Cxvdk9qvP7GMUP6EhefK0c,1814
|
573
|
+
mct_nightly-2.2.0.20241027.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
|
574
|
+
mct_nightly-2.2.0.20241027.532.dist-info/METADATA,sha256=F0SYwH78ncR2_2Cx8EnUV0f_eydBxIdBjGnOd22oxqM,20830
|
575
|
+
mct_nightly-2.2.0.20241027.532.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
576
|
+
mct_nightly-2.2.0.20241027.532.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
|
577
|
+
mct_nightly-2.2.0.20241027.532.dist-info/RECORD,,
|
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
|
|
27
27
|
from model_compression_toolkit import pruning
|
28
28
|
from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
|
29
29
|
|
30
|
-
__version__ = "2.2.0.
|
30
|
+
__version__ = "2.2.0.20241027.000532"
|
@@ -22,4 +22,11 @@ SOFT_ROUNDING_ZETA = 1.1
|
|
22
22
|
|
23
23
|
# GPTQ config constant
|
24
24
|
QUANT_PARAM_LEARNING_STR = 'quantization_parameter_learning'
|
25
|
-
MAX_LSB_STR = 'max_lsbs_change_map'
|
25
|
+
MAX_LSB_STR = 'max_lsbs_change_map'
|
26
|
+
|
27
|
+
# GPTQ learning hyperparameters
|
28
|
+
LR_DEFAULT = 3e-2
|
29
|
+
LR_REST_DEFAULT = 1e-4
|
30
|
+
LR_BIAS_DEFAULT = 1e-3
|
31
|
+
LR_QUANTIZATION_PARAM_DEFAULT = 1e-3
|
32
|
+
GPTQ_MOMENTUM = 0.9
|
model_compression_toolkit/gptq/{pytorch/quantizer → common}/gradual_activation_quantization.py
RENAMED
@@ -13,23 +13,23 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
from functools import partial
|
16
|
-
from typing import Callable
|
16
|
+
from typing import Callable, Any
|
17
17
|
|
18
18
|
from model_compression_toolkit.gptq import GradientPTQConfig, QFractionLinearAnnealingConfig
|
19
|
-
from model_compression_toolkit.trainable_infrastructure import
|
20
|
-
|
21
|
-
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
|
19
|
+
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer
|
22
20
|
|
23
21
|
|
24
22
|
def get_gradual_activation_quantizer_wrapper_factory(gptq_config: GradientPTQConfig,
|
25
|
-
get_total_grad_steps_fn: Callable[[], int]
|
26
|
-
|
23
|
+
get_total_grad_steps_fn: Callable[[], int],
|
24
|
+
fw_linear_annealing_scheduler: type) \
|
25
|
+
-> Callable[[Any], 'GradualActivationQuantizerWrapper']:
|
27
26
|
"""
|
28
27
|
Get a factory for 'GradualActivationQuantizerWrapper'.
|
29
28
|
|
30
29
|
Args:
|
31
30
|
gptq_config: GPTQ configuration.
|
32
31
|
get_total_grad_steps_fn: a callable to obtain the total expected number of gradient steps.
|
32
|
+
fw_linear_annealing_scheduler: LinearAnnealingScheduler implementation of the framework (tf/pytorch).
|
33
33
|
|
34
34
|
Returns:
|
35
35
|
A factory function to build 'GradualActivationQuantizerWrapper' from Quantizer.
|
@@ -40,9 +40,9 @@ def get_gradual_activation_quantizer_wrapper_factory(gptq_config: GradientPTQCon
|
|
40
40
|
annealing_cfg = gptq_config.gradual_activation_quantization_config.q_fraction_scheduler_policy
|
41
41
|
if isinstance(annealing_cfg, QFractionLinearAnnealingConfig):
|
42
42
|
t_end = annealing_cfg.end_step or get_total_grad_steps_fn()
|
43
|
-
factor_scheduler =
|
44
|
-
|
45
|
-
|
43
|
+
factor_scheduler = fw_linear_annealing_scheduler(t_start=annealing_cfg.start_step, t_end=t_end,
|
44
|
+
initial_val=annealing_cfg.initial_q_fraction,
|
45
|
+
target_val=annealing_cfg.target_q_fraction)
|
46
46
|
else:
|
47
47
|
raise ValueError(f'Unknown annealing policy {annealing_cfg}')
|
48
48
|
|
@@ -64,7 +64,7 @@ class GradualActivationQuantizerWrapper:
|
|
64
64
|
quantizer: quantizer to wrap.
|
65
65
|
q_fraction_scheduler: a callable that accepts a gradient step and returns the corresponding quantized fraction.
|
66
66
|
"""
|
67
|
-
def __init__(self, quantizer:
|
67
|
+
def __init__(self, quantizer: BaseTrainableQuantizer, q_fraction_scheduler: Callable[[int], float]):
|
68
68
|
self.quantizer = quantizer
|
69
69
|
self.q_fraction_scheduler = q_fraction_scheduler
|
70
70
|
self.step_cnt = 0
|
@@ -12,17 +12,20 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import Callable
|
16
15
|
|
17
|
-
from
|
18
|
-
from
|
19
|
-
SoftQuantizerRegularization
|
20
|
-
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
|
16
|
+
from tqdm import tqdm
|
17
|
+
from typing import Callable, Type
|
21
18
|
|
19
|
+
from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig
|
22
20
|
|
21
|
+
# Common warmup fraction
|
23
22
|
WARMUP_STEP_FRACTION = 0.2
|
24
23
|
|
25
|
-
|
24
|
+
|
25
|
+
def get_regularization(gptq_config: GradientPTQConfig,
|
26
|
+
get_total_grad_steps_fn: Callable[[], int],
|
27
|
+
SoftQuantizerRegularizationFWClass: Type,
|
28
|
+
LinearAnnealingSchedulerFWClass: Type) -> Callable:
|
26
29
|
"""
|
27
30
|
Returns a function that computes the regularization term for GPTQ training based on the given
|
28
31
|
rounding type in the GPTQ configuration.
|
@@ -30,15 +33,26 @@ def get_regularization(gptq_config: GradientPTQConfig, get_total_grad_steps_fn:
|
|
30
33
|
Args:
|
31
34
|
gptq_config: A GPTQ configuration.
|
32
35
|
get_total_grad_steps_fn: a callable to obtain the total expected number of gradient steps.
|
36
|
+
SoftQuantizerRegularizationFWClass: The class to use for soft quantizer regularization (framework-specific).
|
37
|
+
LinearAnnealingSchedulerFWClass: The class to use for the annealing scheduler (framework-specific).
|
33
38
|
|
34
|
-
Returns:
|
35
|
-
|
36
|
-
|
39
|
+
Returns:
|
40
|
+
Callable: A function for computing the regularization. If there is no regularization function
|
41
|
+
defined for the given rounding type, then it returns a function that just returns 0.
|
37
42
|
"""
|
38
43
|
if gptq_config.rounding_type == RoundingType.SoftQuantizer:
|
39
44
|
total_gradient_steps = get_total_grad_steps_fn()
|
40
45
|
t_start = int(WARMUP_STEP_FRACTION * total_gradient_steps)
|
41
|
-
|
42
|
-
|
46
|
+
|
47
|
+
# Directly initializing the scheduler within the method
|
48
|
+
scheduler = LinearAnnealingSchedulerFWClass(
|
49
|
+
t_start=t_start,
|
50
|
+
t_end=total_gradient_steps,
|
51
|
+
initial_val=20,
|
52
|
+
target_val=2
|
53
|
+
)
|
54
|
+
|
55
|
+
# Return the framework-specific soft quantizer regularization
|
56
|
+
return SoftQuantizerRegularizationFWClass(scheduler)
|
43
57
|
else:
|
44
58
|
return lambda *args, **kwargs: 0
|
@@ -26,9 +26,14 @@ from model_compression_toolkit.core.common.user_info import UserInformation
|
|
26
26
|
from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
|
27
27
|
from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader
|
28
28
|
from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
|
29
|
+
from model_compression_toolkit.gptq.common.gradual_activation_quantization import \
|
30
|
+
get_gradual_activation_quantizer_wrapper_factory
|
31
|
+
from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
|
29
32
|
from model_compression_toolkit.gptq.keras.quantizer.quantization_builder import quantization_builder
|
30
33
|
from model_compression_toolkit.logger import Logger
|
31
34
|
from mct_quantizers import KerasActivationQuantizationHolder
|
35
|
+
from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
|
36
|
+
from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
|
32
37
|
|
33
38
|
if version.parse(tf.__version__) >= version.parse("2.13"):
|
34
39
|
from keras.src.engine.base_layer import TensorFlowOpLayer
|
@@ -41,13 +46,12 @@ from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
|
|
41
46
|
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
|
42
47
|
from model_compression_toolkit.core.common import Graph
|
43
48
|
from model_compression_toolkit.gptq.keras.graph_info import get_weights_for_loss, get_gptq_trainable_parameters
|
44
|
-
from model_compression_toolkit.gptq.keras.quantizer.regularization_factory import get_regularization
|
45
49
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
46
50
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
47
51
|
import numpy as np
|
48
52
|
import copy
|
49
53
|
from model_compression_toolkit.core.keras.constants import BIAS, USE_BIAS
|
50
|
-
|
54
|
+
from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization
|
51
55
|
|
52
56
|
class KerasGPTQTrainer(GPTQTrainer):
|
53
57
|
"""
|
@@ -78,6 +82,15 @@ class KerasGPTQTrainer(GPTQTrainer):
|
|
78
82
|
hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
|
79
83
|
|
80
84
|
"""
|
85
|
+
|
86
|
+
def _get_total_grad_steps():
|
87
|
+
return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
|
88
|
+
|
89
|
+
# This must be set before the model building (as it is required for activation holder construction),
|
90
|
+
# which occurs in the base constructor.
|
91
|
+
self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
|
92
|
+
gptq_config, _get_total_grad_steps, KerasLinearAnnealingScheduler)
|
93
|
+
|
81
94
|
super().__init__(graph_float,
|
82
95
|
graph_quant,
|
83
96
|
gptq_config,
|
@@ -119,7 +132,10 @@ class KerasGPTQTrainer(GPTQTrainer):
|
|
119
132
|
|
120
133
|
self.weights_for_average_loss = self._get_compare_points_loss_weights()
|
121
134
|
|
122
|
-
self.reg_func = get_regularization(self.gptq_config,
|
135
|
+
self.reg_func = get_regularization(self.gptq_config,
|
136
|
+
_get_total_grad_steps,
|
137
|
+
SoftQuantizerRegularization,
|
138
|
+
KerasLinearAnnealingScheduler)
|
123
139
|
|
124
140
|
def _get_compare_points_loss_weights(self):
|
125
141
|
""" Get compare points weights for the distillation loss. """
|
@@ -185,14 +201,13 @@ class KerasGPTQTrainer(GPTQTrainer):
|
|
185
201
|
_, activation_quantizers = quantization_builder(n, self.gptq_config) # TODO: split quantizers building into two functions: for weights and activations
|
186
202
|
|
187
203
|
# Holder by definition uses a single quantizer for the activation quantization
|
188
|
-
# thus we make sure this is the only possible case
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
f"Ensure only one quantizer is configured for each node's activation.")
|
204
|
+
# thus we make sure this is the only possible case.
|
205
|
+
if len(activation_quantizers) != 1:
|
206
|
+
Logger.critical(f"'KerasActivationQuantizationHolder' is designed to support a single quantizer, "
|
207
|
+
f"but {len(activation_quantizers)} quantizers were found for node '{n}'. "
|
208
|
+
f"Ensure only one quantizer is configured for each node's activation.")
|
209
|
+
quantizer = self.gradual_act_quantizer_wrapper_factory(activation_quantizers[0])
|
210
|
+
return KerasActivationQuantizationHolder(quantizer)
|
196
211
|
|
197
212
|
def build_gptq_model(self) -> Tuple[Model, UserInformation]:
|
198
213
|
"""
|
@@ -14,17 +14,18 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
import copy
|
16
16
|
|
17
|
-
from typing import Callable, Tuple
|
17
|
+
from typing import Callable, Tuple, Union
|
18
18
|
from packaging import version
|
19
19
|
|
20
|
-
from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
|
21
20
|
from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
|
22
|
-
from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
|
21
|
+
from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR_DEFAULT, LR_REST_DEFAULT, \
|
22
|
+
LR_BIAS_DEFAULT, GPTQ_MOMENTUM
|
23
23
|
from model_compression_toolkit.logger import Logger
|
24
24
|
from model_compression_toolkit.constants import TENSORFLOW, ACT_HESSIAN_DEFAULT_BATCH_SIZE
|
25
25
|
from model_compression_toolkit.verify_packages import FOUND_TF
|
26
26
|
from model_compression_toolkit.core.common.user_info import UserInformation
|
27
|
-
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig
|
27
|
+
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig, \
|
28
|
+
GradualActivationQuantizationConfig
|
28
29
|
from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
|
29
30
|
from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
|
30
31
|
from model_compression_toolkit.core import CoreConfig
|
@@ -32,13 +33,8 @@ from model_compression_toolkit.core.runner import core_runner
|
|
32
33
|
from model_compression_toolkit.gptq.runner import gptq_runner
|
33
34
|
from model_compression_toolkit.core.analyzer import analyzer_model_quantization
|
34
35
|
from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
|
35
|
-
from model_compression_toolkit.metadata import
|
36
|
+
from model_compression_toolkit.metadata import create_model_metadata
|
36
37
|
|
37
|
-
LR_DEFAULT = 0.15
|
38
|
-
LR_REST_DEFAULT = 1e-4
|
39
|
-
LR_BIAS_DEFAULT = 1e-4
|
40
|
-
LR_QUANTIZATION_PARAM_DEFAULT = 1e-3
|
41
|
-
GPTQ_MOMENTUM = 0.9
|
42
38
|
|
43
39
|
if FOUND_TF:
|
44
40
|
import tensorflow as tf
|
@@ -54,25 +50,25 @@ if FOUND_TF:
|
|
54
50
|
|
55
51
|
# As from TF2.9 optimizers package is changed
|
56
52
|
if version.parse(tf.__version__) < version.parse("2.9"):
|
57
|
-
from keras.optimizer_v2.optimizer_v2 import OptimizerV2
|
53
|
+
from keras.optimizer_v2.optimizer_v2 import OptimizerV2 # pragma: no cover
|
58
54
|
elif version.parse(tf.__version__) < version.parse("2.12"):
|
59
|
-
from keras.optimizers.optimizer_v2.optimizer_v2 import OptimizerV2
|
55
|
+
from keras.optimizers.optimizer_v2.optimizer_v2 import OptimizerV2 # pragma: no cover
|
60
56
|
else:
|
61
57
|
from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
|
62
58
|
|
63
59
|
DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
|
64
60
|
|
65
|
-
|
66
61
|
def get_keras_gptq_config(n_epochs: int,
|
67
|
-
optimizer: OptimizerV2 =
|
68
|
-
optimizer_rest: OptimizerV2 =
|
62
|
+
optimizer: OptimizerV2 = None,
|
63
|
+
optimizer_rest: OptimizerV2 = None,
|
69
64
|
loss: Callable = GPTQMultipleTensorsLoss(),
|
70
65
|
log_function: Callable = None,
|
71
66
|
use_hessian_based_weights: bool = True,
|
72
67
|
regularization_factor: float = REG_DEFAULT,
|
73
|
-
hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
|
68
|
+
hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE,
|
69
|
+
gradual_activation_quantization: Union[bool, GradualActivationQuantizationConfig] = False) -> GradientPTQConfig:
|
74
70
|
"""
|
75
|
-
Create a
|
71
|
+
Create a GradientPTQConfig instance for Keras models.
|
76
72
|
|
77
73
|
args:
|
78
74
|
n_epochs (int): Number of epochs for running the representative dataset for fine-tuning.
|
@@ -83,9 +79,10 @@ if FOUND_TF:
|
|
83
79
|
use_hessian_based_weights (bool): Whether to use Hessian-based weights for weighted average loss.
|
84
80
|
regularization_factor (float): A floating point number that defines the regularization factor.
|
85
81
|
hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
|
82
|
+
gradual_activation_quantization (bool, GradualActivationQuantizationConfig): If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
|
86
83
|
|
87
84
|
returns:
|
88
|
-
a
|
85
|
+
a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
|
89
86
|
|
90
87
|
Examples:
|
91
88
|
|
@@ -94,7 +91,7 @@ if FOUND_TF:
|
|
94
91
|
>>> import model_compression_toolkit as mct
|
95
92
|
>>> import tensorflow as tf
|
96
93
|
|
97
|
-
Create a
|
94
|
+
Create a GradientPTQConfig to run for 5 epochs:
|
98
95
|
|
99
96
|
>>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=5)
|
100
97
|
|
@@ -102,11 +99,24 @@ if FOUND_TF:
|
|
102
99
|
|
103
100
|
>>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
|
104
101
|
|
105
|
-
The configuration can be passed to :func:`~model_compression_toolkit.
|
102
|
+
The configuration can be passed to :func:`~model_compression_toolkit.keras_gradient_post_training_quantization` in order to quantize a keras model using gptq.
|
103
|
+
|
106
104
|
|
107
105
|
"""
|
106
|
+
optimizer = optimizer or tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT)
|
107
|
+
optimizer_rest = optimizer_rest or tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT)
|
108
|
+
|
108
109
|
bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT,
|
109
110
|
momentum=GPTQ_MOMENTUM)
|
111
|
+
|
112
|
+
if isinstance(gradual_activation_quantization, bool):
|
113
|
+
gradual_quant_config = GradualActivationQuantizationConfig() if gradual_activation_quantization else None
|
114
|
+
elif isinstance(gradual_activation_quantization, GradualActivationQuantizationConfig):
|
115
|
+
gradual_quant_config = gradual_activation_quantization
|
116
|
+
else:
|
117
|
+
raise TypeError(f'gradual_activation_quantization argument should be bool or '
|
118
|
+
f'GradualActivationQuantizationConfig, received {type(gradual_activation_quantization)}')
|
119
|
+
|
110
120
|
return GradientPTQConfig(n_epochs,
|
111
121
|
optimizer,
|
112
122
|
optimizer_rest=optimizer_rest,
|
@@ -116,7 +126,8 @@ if FOUND_TF:
|
|
116
126
|
optimizer_bias=bias_optimizer,
|
117
127
|
use_hessian_based_weights=use_hessian_based_weights,
|
118
128
|
regularization_factor=regularization_factor,
|
119
|
-
hessian_weights_config=GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size)
|
129
|
+
hessian_weights_config=GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size),
|
130
|
+
gradual_activation_quantization_config=gradual_quant_config)
|
120
131
|
|
121
132
|
|
122
133
|
def keras_gradient_post_training_quantization(in_model: Model, representative_data_gen: Callable,
|
@@ -251,13 +262,13 @@ if FOUND_TF:
|
|
251
262
|
else:
|
252
263
|
# If tensorflow is not installed,
|
253
264
|
# we raise an exception when trying to use these functions.
|
254
|
-
def get_keras_gptq_config(*args, **kwargs):
|
265
|
+
def get_keras_gptq_config(*args, **kwargs): # pragma: no cover
|
255
266
|
Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
|
256
267
|
"get_keras_gptq_config. The 'tensorflow' package is missing or is "
|
257
268
|
"installed with a version higher than 2.15.") # pragma: no cover
|
258
269
|
|
259
270
|
|
260
|
-
def keras_gradient_post_training_quantization(*args, **kwargs):
|
271
|
+
def keras_gradient_post_training_quantization(*args, **kwargs): # pragma: no cover
|
261
272
|
Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
|
262
273
|
"keras_gradient_post_training_quantization. The 'tensorflow' package is missing or is "
|
263
|
-
"installed with a version higher than 2.15.")
|
274
|
+
"installed with a version higher than 2.15.")
|
@@ -16,18 +16,18 @@ from typing import Dict, List, Tuple
|
|
16
16
|
|
17
17
|
from model_compression_toolkit.gptq import GradientPTQConfig
|
18
18
|
from model_compression_toolkit.core import common
|
19
|
-
from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
|
20
|
-
get_inferable_quantizer_kwargs
|
21
19
|
from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
|
22
20
|
from mct_quantizers import QuantizationTarget
|
23
|
-
from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
|
24
21
|
from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer
|
25
22
|
|
26
23
|
from model_compression_toolkit.logger import Logger
|
24
|
+
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
27
25
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \
|
28
|
-
get_trainable_quantizer_weights_config
|
26
|
+
get_trainable_quantizer_weights_config, get_trainable_quantizer_activation_config
|
29
27
|
from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \
|
30
28
|
get_trainable_quantizer_class
|
29
|
+
from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers.base_activation_quantizer import \
|
30
|
+
BaseKerasActivationTrainableQuantizer
|
31
31
|
|
32
32
|
|
33
33
|
def quantization_builder(n: common.BaseNode,
|
@@ -70,12 +70,13 @@ def quantization_builder(n: common.BaseNode,
|
|
70
70
|
|
71
71
|
quant_method = n.final_activation_quantization_cfg.activation_quantization_method
|
72
72
|
|
73
|
-
quantizer_class =
|
73
|
+
quantizer_class = get_trainable_quantizer_class(quant_target=QuantizationTarget.Activation,
|
74
|
+
quantizer_id=TrainingMethod.STE,
|
74
75
|
quant_method=quant_method,
|
75
|
-
quantizer_base_class=
|
76
|
+
quantizer_base_class=BaseKerasActivationTrainableQuantizer)
|
77
|
+
cfg = get_trainable_quantizer_activation_config(n, None)
|
76
78
|
|
77
|
-
|
78
|
-
|
79
|
-
activation_quantizers.append(quantizer_class(**kwargs))
|
79
|
+
# freeze_quant_params is True since in GPTQ the activation quantization parameters should not be trained.
|
80
|
+
activation_quantizers.append(quantizer_class(cfg, freeze_quant_params=True))
|
80
81
|
|
81
82
|
return weights_quantizers, activation_quantizers
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
|
-
from typing import List
|
15
|
+
from typing import List, Callable
|
16
16
|
|
17
17
|
import tensorflow as tf
|
18
18
|
from keras import Model
|
@@ -22,61 +22,21 @@ from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribut
|
|
22
22
|
from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
|
23
23
|
|
24
24
|
|
25
|
-
class LinearTempDecay:
|
26
|
-
"""
|
27
|
-
Annealing process for the soft quantizer regularization temperature term.
|
28
|
-
"""
|
29
|
-
|
30
|
-
def __init__(self, t_max: int, rel_start_decay: float = 0.2, start_b: int = 20, end_b: int = 2):
|
31
|
-
"""
|
32
|
-
Initializes a LinearTempDecay object.
|
33
|
-
|
34
|
-
Args:
|
35
|
-
t_max: maximal time step.
|
36
|
-
rel_start_decay: Decay step size at the beginning of the process.
|
37
|
-
start_b: Starting value of the regularization term.
|
38
|
-
end_b: Target value of the regularization term.
|
39
|
-
"""
|
40
|
-
|
41
|
-
self.t_max = t_max
|
42
|
-
self.start_decay = rel_start_decay * t_max
|
43
|
-
self.start_b = start_b
|
44
|
-
self.end_b = end_b
|
45
|
-
|
46
|
-
def __call__(self, t: int) -> float:
|
47
|
-
"""
|
48
|
-
Cosine annealing scheduler for soft quantizer regularization temperature term.
|
49
|
-
|
50
|
-
Args:
|
51
|
-
t: The current time step.
|
52
|
-
|
53
|
-
Returns: Scheduled temperature.
|
54
|
-
"""
|
55
|
-
|
56
|
-
is_before_start_decay = tf.cast(t < self.start_decay, tf.float32)
|
57
|
-
|
58
|
-
rel_t = (t - self.start_decay) / (self.t_max - self.start_decay)
|
59
|
-
|
60
|
-
return self.start_b * is_before_start_decay + \
|
61
|
-
(1 - is_before_start_decay) * \
|
62
|
-
(self.end_b + (self.start_b - self.end_b) * tf.math.maximum(0.0, (1 - rel_t)))
|
63
|
-
|
64
25
|
|
65
26
|
class SoftQuantizerRegularization:
|
66
27
|
"""
|
67
28
|
A class to handle the computation of soft quantizer regularization for GPTQ training.
|
68
29
|
"""
|
69
30
|
|
70
|
-
def __init__(self,
|
31
|
+
def __init__(self, beta_scheduler: Callable[[int], float]):
|
71
32
|
"""
|
72
33
|
Initializes the regularization computation object with a LinearDecay object.
|
73
34
|
|
74
35
|
Args:
|
75
|
-
|
36
|
+
beta_scheduler: a callable that accepts current time step and returns a corresponding beta value.
|
76
37
|
"""
|
77
38
|
# Initializing the temperature decay according to the number of expected gradient steps
|
78
|
-
self.
|
79
|
-
|
39
|
+
self.beta_scheduler = beta_scheduler
|
80
40
|
self.count_iter = tf.Variable(0.)
|
81
41
|
|
82
42
|
|
@@ -91,7 +51,7 @@ class SoftQuantizerRegularization:
|
|
91
51
|
Returns: Regularization value.
|
92
52
|
"""
|
93
53
|
soft_reg_aux: List[tf.Tensor] = []
|
94
|
-
b = self.
|
54
|
+
b = self.beta_scheduler(self.count_iter.value())
|
95
55
|
for layer in model.layers:
|
96
56
|
if isinstance(layer, KerasTrainableQuantizationWrapper):
|
97
57
|
kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
|
@@ -17,15 +17,18 @@ from typing import Callable, List, Tuple, Union, Generator
|
|
17
17
|
|
18
18
|
import numpy as np
|
19
19
|
import torch
|
20
|
-
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
|
21
20
|
from torch.nn import Module
|
22
21
|
from torch.utils.data import DataLoader
|
23
22
|
from tqdm import tqdm
|
24
23
|
|
24
|
+
from model_compression_toolkit.gptq.common.gradual_activation_quantization import get_gradual_activation_quantizer_wrapper_factory
|
25
|
+
from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
|
26
|
+
|
25
27
|
from model_compression_toolkit.core.common import Graph, BaseNode
|
26
28
|
from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
|
27
29
|
from model_compression_toolkit.core.common.framework_info import FrameworkInfo
|
28
30
|
from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresGranularity
|
31
|
+
|
29
32
|
from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
|
30
33
|
from model_compression_toolkit.core.pytorch.constants import BIAS
|
31
34
|
from model_compression_toolkit.core.pytorch.data_util import FixedDatasetFromGenerator, IterableDatasetFromGenerator, \
|
@@ -34,14 +37,15 @@ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, set_mo
|
|
34
37
|
from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
|
35
38
|
from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
|
36
39
|
from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
|
37
|
-
from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters,
|
38
|
-
get_weights_for_loss
|
39
|
-
from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import \
|
40
|
-
get_gradual_activation_quantizer_wrapper_factory
|
40
|
+
from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters, get_weights_for_loss
|
41
41
|
from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
|
42
|
-
|
42
|
+
|
43
|
+
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
|
44
|
+
from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
|
45
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
|
46
|
+
from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization as PytorchSoftQuantizerRegularization
|
47
|
+
|
43
48
|
from model_compression_toolkit.logger import Logger
|
44
|
-
from model_compression_toolkit.trainable_infrastructure.pytorch.util import get_total_grad_steps
|
45
49
|
|
46
50
|
|
47
51
|
class PytorchGPTQTrainer(GPTQTrainer):
|
@@ -78,7 +82,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
|
|
78
82
|
|
79
83
|
# must be set prior to model building in the base class constructor
|
80
84
|
self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
|
81
|
-
gptq_config, _get_total_grad_steps)
|
85
|
+
gptq_config, _get_total_grad_steps, PytorchLinearAnnealingScheduler)
|
82
86
|
|
83
87
|
super().__init__(graph_float,
|
84
88
|
graph_quant,
|
@@ -121,7 +125,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
|
|
121
125
|
else:
|
122
126
|
self.train_dataloader = self._prepare_train_dataloader_for_non_sla(representative_data_gen)
|
123
127
|
|
124
|
-
self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps)
|
128
|
+
self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps, PytorchSoftQuantizerRegularization, PytorchLinearAnnealingScheduler)
|
125
129
|
|
126
130
|
def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
|
127
131
|
"""
|
@@ -26,18 +26,15 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
|
|
26
26
|
from model_compression_toolkit.core.runner import core_runner
|
27
27
|
from model_compression_toolkit.gptq.common.gptq_config import (
|
28
28
|
GradientPTQConfig, GPTQHessianScoresConfig, GradualActivationQuantizationConfig)
|
29
|
-
from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
|
30
|
-
|
29
|
+
from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR_DEFAULT, LR_REST_DEFAULT, \
|
30
|
+
LR_BIAS_DEFAULT, GPTQ_MOMENTUM
|
31
31
|
from model_compression_toolkit.gptq.runner import gptq_runner
|
32
32
|
from model_compression_toolkit.logger import Logger
|
33
33
|
from model_compression_toolkit.metadata import create_model_metadata
|
34
34
|
from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
|
35
35
|
from model_compression_toolkit.verify_packages import FOUND_TORCH
|
36
36
|
|
37
|
-
|
38
|
-
LR_REST_DEFAULT = 1e-4
|
39
|
-
LR_BIAS_DEFAULT = 1e-4
|
40
|
-
LR_QUANTIZATION_PARAM_DEFAULT = 1e-4
|
37
|
+
|
41
38
|
|
42
39
|
if FOUND_TORCH:
|
43
40
|
from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
|
@@ -76,10 +73,6 @@ if FOUND_TORCH:
|
|
76
73
|
regularization_factor (float): A floating point number that defines the regularization factor.
|
77
74
|
hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
|
78
75
|
use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss.
|
79
|
-
gradual_activation_quantization (bool, GradualActivationQuantizationConfig):
|
80
|
-
If False, GradualActivationQuantization is disabled.
|
81
|
-
If True, GradualActivationQuantization is enabled with the default settings.
|
82
|
-
GradualActivationQuantizationConfig object can be passed to use non-default settings.
|
83
76
|
|
84
77
|
returns:
|
85
78
|
a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
|
@@ -96,9 +89,6 @@ if FOUND_TORCH:
|
|
96
89
|
>>> import torch
|
97
90
|
>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
|
98
91
|
|
99
|
-
To enable Gradual Activation Quantization with non-default settings build GradualActivationQuantizationConfig:
|
100
|
-
>>> gradual_act_conf = mct.gptq.GradualActivationQuantizationConfig(mct.gptq.QFractionLinearAnnealingConfig(initial_q_fraction=0.2))
|
101
|
-
>>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, gradual_activation_quantization=gradual_act_conf)
|
102
92
|
The configuration can be passed to :func:`~model_compression_toolkit.pytorch_gradient_post_training_quantization` in order to quantize a pytorch model using gptq.
|
103
93
|
|
104
94
|
"""
|
@@ -18,7 +18,6 @@ from typing import Union
|
|
18
18
|
import numpy as np
|
19
19
|
import tensorflow as tf
|
20
20
|
from tensorflow.python.framework.tensor_shape import TensorShape
|
21
|
-
from model_compression_toolkit.constants import SIGNED
|
22
21
|
|
23
22
|
from model_compression_toolkit.trainable_infrastructure import TrainingMethod
|
24
23
|
|
@@ -29,10 +28,9 @@ from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
|
|
29
28
|
from model_compression_toolkit import constants as C
|
30
29
|
|
31
30
|
from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer
|
32
|
-
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
|
33
|
-
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
31
|
+
from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
|
32
|
+
from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
|
34
33
|
from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
|
35
|
-
from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale
|
36
34
|
from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import symmetric_lsq_quantizer
|
37
35
|
|
38
36
|
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from abc import abstractmethod
|
16
|
+
|
17
|
+
|
18
|
+
class BaseLinearAnnealingScheduler:
|
19
|
+
def __init__(self, t_start: int, t_end: int, initial_val: float, target_val: float):
|
20
|
+
"""
|
21
|
+
Base class for Linear annealing scheduler. Returns the corresponding annealed value per time step.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
t_start: Time step to begin annealing.
|
25
|
+
t_end: Time step to complete annealing.
|
26
|
+
initial_val: Initial value before annealing.
|
27
|
+
target_val: Target value after annealing.
|
28
|
+
|
29
|
+
Raises:
|
30
|
+
ValueError: If t_start is not in the range [0, t_end).
|
31
|
+
"""
|
32
|
+
if not (0 <= t_start < t_end):
|
33
|
+
raise ValueError(f'Expected 0 <= t_start < t_end, actual {t_end=} {t_start=}')
|
34
|
+
|
35
|
+
self.t_start = t_start
|
36
|
+
self.t_end = t_end
|
37
|
+
self.initial_val = initial_val
|
38
|
+
self.target_val = target_val
|
39
|
+
|
40
|
+
@abstractmethod
|
41
|
+
def _compute_factor(self, t: int) -> float:
|
42
|
+
"""
|
43
|
+
Abstract method to compute the annealing factor based on time step `t`.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
t: Current time step.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
float: Annealing factor, typically in the range [0, 1].
|
50
|
+
|
51
|
+
Raises:
|
52
|
+
NotImplementedError: If this method is not overridden in the subclass.
|
53
|
+
"""
|
54
|
+
raise NotImplementedError("This method should be overridden in subclasses")
|
55
|
+
|
56
|
+
def __call__(self, t: int) -> float:
|
57
|
+
"""
|
58
|
+
Calculates the annealed value based on the current time step `t`.
|
59
|
+
|
60
|
+
Args:
|
61
|
+
t: Current time step.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
float: Annealed value between initial_val and target_val.
|
65
|
+
"""
|
66
|
+
factor = self._compute_factor(t)
|
67
|
+
return self.initial_val + factor * (self.target_val - self.initial_val)
|
68
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
import tensorflow as tf
|
16
|
+
|
17
|
+
from model_compression_toolkit.trainable_infrastructure.common.annealing_schedulers import BaseLinearAnnealingScheduler
|
18
|
+
|
19
|
+
|
20
|
+
class KerasLinearAnnealingScheduler(BaseLinearAnnealingScheduler):
|
21
|
+
def _compute_factor(self, t: int) -> float:
|
22
|
+
"""
|
23
|
+
Computes the annealing factor for Keras models.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
t: Current time step.
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
float: Clipped annealing factor between 0 and 1.
|
30
|
+
"""
|
31
|
+
factor = (t - self.t_start) / (self.t_end - self.t_start)
|
32
|
+
return tf.clip_by_value(factor, 0, 1)
|
@@ -13,27 +13,19 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
# ==============================================================================
|
15
15
|
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
|
16
|
+
from model_compression_toolkit.trainable_infrastructure.common.annealing_schedulers import BaseLinearAnnealingScheduler
|
16
17
|
|
17
18
|
|
18
|
-
class
|
19
|
-
def
|
19
|
+
class PytorchLinearAnnealingScheduler(BaseLinearAnnealingScheduler):
|
20
|
+
def _compute_factor(self, t: int) -> float:
|
20
21
|
"""
|
21
|
-
|
22
|
+
Computes the annealing factor for torch models.
|
22
23
|
|
23
24
|
Args:
|
24
|
-
|
25
|
-
t_end: time step to complete annealing.
|
26
|
-
initial_val: initial value.
|
27
|
-
target_val: target value.
|
28
|
-
"""
|
29
|
-
if not (0 <= t_start < t_end):
|
30
|
-
raise ValueError(f'Expected 0 <= t_start < t_end, actual {t_end=} {t_start=}')
|
31
|
-
|
32
|
-
self.t_start = t_start
|
33
|
-
self.t_end = t_end
|
34
|
-
self.initial_val = initial_val
|
35
|
-
self.target_val = target_val
|
25
|
+
t: Current time step.
|
36
26
|
|
37
|
-
|
38
|
-
|
39
|
-
|
27
|
+
Returns:
|
28
|
+
float: Clipped annealing factor between 0 and 1.
|
29
|
+
"""
|
30
|
+
factor = to_torch_tensor((t - self.t_start) / (self.t_end - self.t_start))
|
31
|
+
return factor.clip(0, 1)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
from unittest.mock import Mock
|
16
|
+
import pytest
|
17
|
+
import numpy as np
|
18
|
+
import tensorflow as tf
|
19
|
+
|
20
|
+
from model_compression_toolkit.gptq.common.gradual_activation_quantization import GradualActivationQuantizerWrapper, \
|
21
|
+
get_gradual_activation_quantizer_wrapper_factory
|
22
|
+
from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
|
23
|
+
from model_compression_toolkit.gptq import GradientPTQConfig, GradualActivationQuantizationConfig, QFractionLinearAnnealingConfig
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
@pytest.fixture
|
28
|
+
def x():
|
29
|
+
return tf.random.normal((2, 5, 6, 7), seed=42, dtype=tf.float32)
|
30
|
+
|
31
|
+
|
32
|
+
class Quantizer:
|
33
|
+
def __call__(self, x, training):
|
34
|
+
self.training = training
|
35
|
+
return 3 * x + 1
|
36
|
+
|
37
|
+
|
38
|
+
class TestGradualActivationQuantization:
|
39
|
+
|
40
|
+
def test_gradual_act_quant_wrapper(self, x):
|
41
|
+
quantizer = Quantizer()
|
42
|
+
qw = GradualActivationQuantizerWrapper(quantizer, q_fraction_scheduler=lambda t: t / (t + 1))
|
43
|
+
|
44
|
+
y0, y1, y2 = [qw(x, training=True) for _ in range(3)]
|
45
|
+
assert np.allclose(y0.numpy(), x.numpy()) # t=0
|
46
|
+
assert np.allclose(y1.numpy(), 0.5 * x.numpy() + (1.5 * x.numpy() + 0.5)) # t=1
|
47
|
+
assert np.allclose(y2.numpy(), x.numpy() / 3 + (2 * x.numpy() + 2 / 3)) # t=2
|
48
|
+
assert quantizer.training is True
|
49
|
+
|
50
|
+
_ = qw(x, training=False)
|
51
|
+
assert quantizer.training is False # correct flag was propagated
|
52
|
+
|
53
|
+
def test_factory_no_qdrop(self):
|
54
|
+
quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg=None, get_grad_steps_fn=None)
|
55
|
+
assert quantizer_wrapper is quantizer
|
56
|
+
|
57
|
+
@pytest.mark.parametrize('end_step', (20, None))
|
58
|
+
def test_factory_linear(self, x, end_step):
|
59
|
+
qdrop_cfg = GradualActivationQuantizationConfig(
|
60
|
+
QFractionLinearAnnealingConfig(initial_q_fraction=0.3, target_q_fraction=0.8, start_step=10,
|
61
|
+
end_step=end_step)
|
62
|
+
)
|
63
|
+
|
64
|
+
def get_total_steps():
|
65
|
+
if end_step is None:
|
66
|
+
return 50
|
67
|
+
assert False # should not be called if end_step is passed
|
68
|
+
|
69
|
+
quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, get_total_steps)
|
70
|
+
|
71
|
+
scheduler = quantizer_wrapper.q_fraction_scheduler
|
72
|
+
assert isinstance(scheduler, KerasLinearAnnealingScheduler)
|
73
|
+
exp_end_step = 50 if end_step is None else end_step
|
74
|
+
assert scheduler.t_start == 10
|
75
|
+
assert scheduler.t_end == exp_end_step
|
76
|
+
assert scheduler.initial_val == 0.3
|
77
|
+
assert scheduler.target_val == 0.8
|
78
|
+
|
79
|
+
y = [quantizer_wrapper(x, training=True) for _ in range(exp_end_step + 1)]
|
80
|
+
|
81
|
+
assert np.allclose(y[9].numpy(), 0.7 * x.numpy() + 0.3 * quantizer(x, training=True).numpy())
|
82
|
+
assert np.allclose(y[10].numpy(), 0.7 * x.numpy() + 0.3 * quantizer(x, training=True).numpy())
|
83
|
+
assert np.allclose(y[-1].numpy(), 0.2 * x.numpy() + 0.8 * quantizer(x, training=True).numpy())
|
84
|
+
|
85
|
+
def test_factory_linear_common_case(self, x):
|
86
|
+
# validate that we actually implemented the right thing - on first call float input, on last call fully quantized
|
87
|
+
qdrop_cfg = GradualActivationQuantizationConfig(
|
88
|
+
QFractionLinearAnnealingConfig(initial_q_fraction=0, target_q_fraction=1, start_step=0, end_step=None)
|
89
|
+
)
|
90
|
+
quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, lambda: 15)
|
91
|
+
y0, *_, y_last = [quantizer_wrapper(x, training=True) for _ in range(16)]
|
92
|
+
assert np.array_equal(y0.numpy(), x.numpy())
|
93
|
+
assert np.allclose(y_last.numpy(), quantizer(x, training=True).numpy())
|
94
|
+
|
95
|
+
def _run_factory_test(self, qdrop_cfg, get_grad_steps_fn):
|
96
|
+
# Mocks are used to just pass anything
|
97
|
+
gptq_cfg = GradientPTQConfig(n_epochs=5, optimizer=Mock(), loss=Mock(),
|
98
|
+
gradual_activation_quantization_config=qdrop_cfg)
|
99
|
+
factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn, KerasLinearAnnealingScheduler)
|
100
|
+
quantizer = Quantizer()
|
101
|
+
quantizer_wrapper = factory(quantizer)
|
102
|
+
return quantizer_wrapper, quantizer
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
|
16
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
import numpy as np
|
16
|
+
import pytest
|
17
|
+
|
18
|
+
from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
|
19
|
+
|
20
|
+
|
21
|
+
def test_linear_annealing():
|
22
|
+
scheduler = KerasLinearAnnealingScheduler(t_start=10, t_end=35, initial_val=3.4, target_val=-1.6)
|
23
|
+
for t in [0, 9, 10]:
|
24
|
+
assert _isclose(scheduler(t), 3.4)
|
25
|
+
|
26
|
+
for t in [35, 36, 1000]:
|
27
|
+
assert _isclose(scheduler(t), -1.6)
|
28
|
+
|
29
|
+
assert _isclose(scheduler(11), 3.2)
|
30
|
+
assert _isclose(scheduler(27), 0.)
|
31
|
+
assert _isclose(scheduler(34), -1.4)
|
32
|
+
|
33
|
+
|
34
|
+
def test_linear_annealing_ascending():
|
35
|
+
scheduler = KerasLinearAnnealingScheduler(t_start=0, t_end=5, initial_val=-0.5, target_val=1.5)
|
36
|
+
assert _isclose(scheduler(0), -0.5)
|
37
|
+
assert _isclose(scheduler(1), -0.1)
|
38
|
+
assert _isclose(scheduler(4), 1.1)
|
39
|
+
assert _isclose(scheduler(5), 1.5)
|
40
|
+
|
41
|
+
|
42
|
+
@pytest.mark.parametrize('start', [5, -1])
|
43
|
+
def test_invalid(start):
|
44
|
+
with pytest.raises(ValueError):
|
45
|
+
KerasLinearAnnealingScheduler(t_start=start, t_end=4, initial_val=1, target_val=0)
|
46
|
+
|
47
|
+
|
48
|
+
def _isclose(x, y):
|
49
|
+
return np.isclose(x, y)
|
@@ -18,9 +18,9 @@ import pytest
|
|
18
18
|
import torch
|
19
19
|
|
20
20
|
from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
|
21
|
-
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import
|
21
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
|
22
22
|
from model_compression_toolkit.gptq import GradientPTQConfig, GradualActivationQuantizationConfig, QFractionLinearAnnealingConfig
|
23
|
-
from model_compression_toolkit.gptq.
|
23
|
+
from model_compression_toolkit.gptq.common.gradual_activation_quantization import (
|
24
24
|
GradualActivationQuantizerWrapper, get_gradual_activation_quantizer_wrapper_factory)
|
25
25
|
|
26
26
|
|
@@ -68,7 +68,7 @@ class TestGradualActivationQuantization:
|
|
68
68
|
quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, get_total_steps)
|
69
69
|
|
70
70
|
scheduler = quantizer_wrapper.q_fraction_scheduler
|
71
|
-
assert isinstance(scheduler,
|
71
|
+
assert isinstance(scheduler, PytorchLinearAnnealingScheduler)
|
72
72
|
exp_end_step = 50 if end_step is None else end_step
|
73
73
|
assert scheduler.t_start == 10
|
74
74
|
assert scheduler.t_end == exp_end_step
|
@@ -94,7 +94,7 @@ class TestGradualActivationQuantization:
|
|
94
94
|
# Mocks are used to just pass anything
|
95
95
|
gptq_cfg = GradientPTQConfig(n_epochs=5, optimizer=Mock(), loss=Mock(),
|
96
96
|
gradual_activation_quantization_config=qdrop_cfg)
|
97
|
-
factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn)
|
97
|
+
factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn, PytorchLinearAnnealingScheduler)
|
98
98
|
quantizer = Quantizer()
|
99
99
|
quantizer_wrapper = factory(quantizer)
|
100
100
|
return quantizer_wrapper, quantizer
|
@@ -15,11 +15,11 @@
|
|
15
15
|
import torch
|
16
16
|
import pytest
|
17
17
|
|
18
|
-
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import
|
18
|
+
from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
|
19
19
|
|
20
20
|
|
21
21
|
def test_linear_annealing():
|
22
|
-
scheduler =
|
22
|
+
scheduler = PytorchLinearAnnealingScheduler(t_start=10, t_end=35, initial_val=3.4, target_val=-1.6)
|
23
23
|
for t in [0, 9, 10]:
|
24
24
|
assert _isclose(scheduler(t), 3.4)
|
25
25
|
|
@@ -32,7 +32,7 @@ def test_linear_annealing():
|
|
32
32
|
|
33
33
|
|
34
34
|
def test_linear_annealing_ascending():
|
35
|
-
scheduler =
|
35
|
+
scheduler = PytorchLinearAnnealingScheduler(t_start=0, t_end=5, initial_val=-0.5, target_val=1.5)
|
36
36
|
assert _isclose(scheduler(0), -0.5)
|
37
37
|
assert _isclose(scheduler(1), -0.1)
|
38
38
|
assert _isclose(scheduler(4), 1.1)
|
@@ -42,7 +42,7 @@ def test_linear_annealing_ascending():
|
|
42
42
|
@pytest.mark.parametrize('start', [5, -1])
|
43
43
|
def test_invalid(start):
|
44
44
|
with pytest.raises(ValueError):
|
45
|
-
|
45
|
+
PytorchLinearAnnealingScheduler(t_start=start, t_end=4, initial_val=1, target_val=0)
|
46
46
|
|
47
47
|
|
48
48
|
def _isclose(x, y):
|
@@ -1,44 +0,0 @@
|
|
1
|
-
# Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
from tqdm import tqdm
|
16
|
-
from typing import Callable
|
17
|
-
|
18
|
-
from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
|
19
|
-
from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import \
|
20
|
-
SoftQuantizerRegularization
|
21
|
-
|
22
|
-
|
23
|
-
def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen: Callable) -> Callable:
|
24
|
-
"""
|
25
|
-
Returns a function that computes the regularization term for GPTQ training based on the given
|
26
|
-
rounding type in the GPTQ configuration.
|
27
|
-
|
28
|
-
Args:
|
29
|
-
gptq_config: A GPTQ configuration.
|
30
|
-
representative_data_gen: Dataset used for the GPTQ training.
|
31
|
-
|
32
|
-
Returns: A function for computing the regularization. If there is no regularization function defined for the given
|
33
|
-
rounding type, then it returns a function that just returns 0.
|
34
|
-
|
35
|
-
"""
|
36
|
-
if gptq_config.rounding_type == RoundingType.SoftQuantizer:
|
37
|
-
# dry run on the representative dataset to count number of batches
|
38
|
-
num_batches = 0
|
39
|
-
for _ in tqdm(representative_data_gen(), "GPTQ initialization"):
|
40
|
-
num_batches += 1
|
41
|
-
|
42
|
-
return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
|
43
|
-
else:
|
44
|
-
return lambda m, e_reg: 0
|
{mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/LICENSE.md
RENAMED
File without changes
|
File without changes
|
{mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/top_level.txt
RENAMED
File without changes
|
File without changes
|