mct-nightly 2.2.0.20241025.505__py3-none-any.whl → 2.2.0.20241027.532__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/RECORD +26 -21
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/gptq/common/gptq_constants.py +8 -1
  5. model_compression_toolkit/gptq/{pytorch/quantizer → common}/gradual_activation_quantization.py +10 -10
  6. model_compression_toolkit/gptq/{pytorch/quantizer → common}/regularization_factory.py +25 -11
  7. model_compression_toolkit/gptq/keras/gptq_training.py +26 -11
  8. model_compression_toolkit/gptq/keras/quantization_facade.py +35 -24
  9. model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py +10 -9
  10. model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py +5 -45
  11. model_compression_toolkit/gptq/pytorch/gptq_training.py +13 -9
  12. model_compression_toolkit/gptq/pytorch/quantization_facade.py +3 -13
  13. model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py +2 -4
  14. model_compression_toolkit/trainable_infrastructure/common/annealing_schedulers.py +68 -0
  15. model_compression_toolkit/trainable_infrastructure/keras/annealing_schedulers.py +32 -0
  16. model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py +10 -18
  17. tests_pytest/keras/gptq/__init__.py +14 -0
  18. tests_pytest/keras/gptq/test_gradual_act_quantization.py +102 -0
  19. tests_pytest/keras/trainable_infrastructure/__init__.py +16 -0
  20. tests_pytest/keras/trainable_infrastructure/test_linear_annealing.py +49 -0
  21. tests_pytest/pytorch/gptq/test_gradual_act_quantization.py +4 -4
  22. tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py +4 -4
  23. model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py +0 -44
  24. {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/LICENSE.md +0 -0
  25. {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/WHEEL +0 -0
  26. {mct_nightly-2.2.0.20241025.505.dist-info → mct_nightly-2.2.0.20241027.532.dist-info}/top_level.txt +0 -0
  27. /model_compression_toolkit/trainable_infrastructure/{pytorch → common}/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mct-nightly
3
- Version: 2.2.0.20241025.505
3
+ Version: 2.2.0.20241027.532
4
4
  Summary: A Model Compression Toolkit for neural networks
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -1,4 +1,4 @@
1
- model_compression_toolkit/__init__.py,sha256=V06niKRfU-gteePjgcRX-Y6QEsEyAyHzzvzwawNEU3U,1573
1
+ model_compression_toolkit/__init__.py,sha256=e7Hzdcg92ykZG1qmS--aAUq-lsyc1Xp_WN__EKEbiDk,1573
2
2
  model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
3
3
  model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
4
4
  model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -347,23 +347,24 @@ model_compression_toolkit/gptq/__init__.py,sha256=pEgkJvmf05KSw70iLDTz_6LI_2Oi5L
347
347
  model_compression_toolkit/gptq/runner.py,sha256=La12JTYjWyJW0YW4Al4TP1_Xi4JWBCEKw6FR_JQsxe0,5982
348
348
  model_compression_toolkit/gptq/common/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
349
349
  model_compression_toolkit/gptq/common/gptq_config.py,sha256=Z6T5B3q4k2Tlr2bBWvC6TAF3d2opyA7ZT_D_mz6D1_0,6297
350
- model_compression_toolkit/gptq/common/gptq_constants.py,sha256=QSm6laLkIV0LYmU0BLtmKp3Fi3SqDfbncFQWOGA1cGU,611
350
+ model_compression_toolkit/gptq/common/gptq_constants.py,sha256=D1x2n4-NdAx6g_1Wc2hwwh4vX9vmx5VnQWN26H107kg,766
351
351
  model_compression_toolkit/gptq/common/gptq_framework_implementation.py,sha256=n3mSf4J92kFjekzyGyrJULylI-8Jf5OVWJ5AFoVnEx0,1266
352
352
  model_compression_toolkit/gptq/common/gptq_graph.py,sha256=-bL5HhPcKqV8nj4dZPXc5QmQJbFBel6etrioikP0tEo,3039
353
353
  model_compression_toolkit/gptq/common/gptq_training.py,sha256=tt4O8PjSChquzl4c6NojvQWZmvCdTxcMLtmEVIGx1ns,13252
354
+ model_compression_toolkit/gptq/common/gradual_activation_quantization.py,sha256=EgpzMs_aDoB0wQiTagqvcxCTfrgNUuCfdXEXmfNiyb0,3780
355
+ model_compression_toolkit/gptq/common/regularization_factory.py,sha256=hyunpXepVeHyoAFJw6zNLK-3ZHBmiut3lmNisJN_L3E,2514
354
356
  model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
355
357
  model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
356
358
  model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
357
- model_compression_toolkit/gptq/keras/gptq_training.py,sha256=tFHucF7YHKtHmYGkdMpqSf14H9c7x60Il7ZTMNXSesE,19751
359
+ model_compression_toolkit/gptq/keras/gptq_training.py,sha256=TEWqAU8JZnZVZ-dIkINA0x1NmSrYpEkXTdG835JdKnI,20848
358
360
  model_compression_toolkit/gptq/keras/graph_info.py,sha256=MKIfrRTRH3zCuxCR1g9ZVIFyuSSr0e0sDybqh4LDM7E,4672
359
- model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=iSHnMEdoIqHYqLCTsdK8uxhKbZuuaDOu_BeQ10Z492U,15715
361
+ model_compression_toolkit/gptq/keras/quantization_facade.py,sha256=DhEEpW0rK4JRdk5WQlN-_DOUuzlwOBqpiwTBOySjn2g,16820
360
362
  model_compression_toolkit/gptq/keras/quantizer/__init__.py,sha256=-DK1CDXvlsnEbki4lukZLpl6Xrbo91_jcqxXlG5Eg6Q,963
361
363
  model_compression_toolkit/gptq/keras/quantizer/base_keras_gptq_quantizer.py,sha256=Rbl9urzkmACvVxICSEyJ02qFOBxWK0UQWtysFJzBVZw,4899
362
364
  model_compression_toolkit/gptq/keras/quantizer/quant_utils.py,sha256=Vt7Qb8i4JsE4sFtcjpfM4FTXTtfV1t6SwfoNH8a_Iaw,5055
363
- model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=FmK5cPwgLAzrDjHTWf_vbRO5s70S7iwpnjnlqEQTuGE,4408
364
- model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py,sha256=guf7ygnLsZeWnTDz4yJdE2iTkd1oE0uQAZwKnGV3OAk,1957
365
+ model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py,sha256=rst-u5EB9Xss4ndKqi297WvZ-9RVee2TAUVFelPVKhU,4663
365
366
  model_compression_toolkit/gptq/keras/quantizer/soft_rounding/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
366
- model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=qUuMKysUpjWYjNbchFuyb_UFwzV1HL7R3Y7o0Z5rf60,4016
367
+ model_compression_toolkit/gptq/keras/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=REO-pIXpT4ZuJzhizvQjz6vn7Vxnq7k0KvikuQ4FDkE,2769
367
368
  model_compression_toolkit/gptq/keras/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=BBSDWLmeywjSM5N6oJkMgcuo7zrXTesB4zLwRGG8QB0,12159
368
369
  model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quantizer.py,sha256=pyhlVpoauHM-zuixHsIGPHFgQoXppL8TlDFCjPE2RuY,10377
369
370
  model_compression_toolkit/gptq/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
@@ -371,15 +372,13 @@ model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha
371
372
  model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
372
373
  model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=_07Zx_43bnNokwR5S8phIqeu5-_7_5VBT4DT-FCw7Do,3892
373
374
  model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
374
- model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=QBxTnwVvLyZDTdpkR81wjj9o5aGtmp9qiBt5FR8ImJ0,21777
375
+ model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=2KwJFlJj6hFJClsJbC9aaWDAGbZUNDbSx1d-QX4LShc,22132
375
376
  model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=4mVM-VvnBaA64ACVdOe6wTGHdMSa2UTLIUe7nACLcdo,4008
376
- model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=yv2DWPWpFVRmtB_FhcRwnLUumyPPHC_hHaMxeQBTQ1k,16333
377
+ model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=lY7_lNtS1SqaaJ0gc6C7_HO71bBalsxQY37QQlWpu70,15479
377
378
  model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
378
379
  model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=fKg-PNOhGBiL-4eySS9Fyw0GkA76Pq8jT_HbJuJ8iZU,4143
379
- model_compression_toolkit/gptq/pytorch/quantizer/gradual_activation_quantization.py,sha256=nngu2TeXjngkqt_6-wciFmCvo-dbpeh_tJJxBV_cfHk,3686
380
380
  model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
381
381
  model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=5EyAzvlU01vLyXmMwY_8dNyb7GwYktXmnrvUON8n8WI,4696
382
- model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=H6pARLK-jq3cKoaipY0SK9wMGrqy6CSEZTk14KdrKA0,2105
383
382
  model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
384
383
  model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=f7B95Bx-MX-HKheqAUn1GG8cVHFI2ldFReXrUPwk2tY,3002
385
384
  model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=kLVQC1hXzDpP4Jx7AwnA764oGnY5AMEuvUUhAvhz09M,12347
@@ -407,7 +406,7 @@ model_compression_toolkit/qat/keras/quantizer/base_keras_qat_weight_quantizer.py
407
406
  model_compression_toolkit/qat/keras/quantizer/quant_utils.py,sha256=cBULOgWUodcBO1lHevZggdTevuDYI6tQceV86U2x6DA,2543
408
407
  model_compression_toolkit/qat/keras/quantizer/quantization_builder.py,sha256=hGizGBbOGZpD-w3wg-LlehUYJDWLk91VUdfVwwG2Z78,5882
409
408
  model_compression_toolkit/qat/keras/quantizer/lsq/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
410
- model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=dtkS0mpjvJntAxpOi-BJx-pCeBF2ReKKeH7y2uwzpH0,6756
409
+ model_compression_toolkit/qat/keras/quantizer/lsq/symmetric_lsq.py,sha256=ujxb7hYHj25QRwu7SScP8BZXWQdh61knzk68KRIdOp0,6501
411
410
  model_compression_toolkit/qat/keras/quantizer/lsq/uniform_lsq.py,sha256=vGUs9b0IHTydCA5tN7iekuhf1LHNgIrSF5sXMD1WsSI,6476
412
411
  model_compression_toolkit/qat/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
413
412
  model_compression_toolkit/qat/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=pFkrjtlavCniswcO3-Djlh6a_Hz1rrcEa7Z5wTGVRCU,8270
@@ -493,6 +492,7 @@ model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/
493
492
  model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tpc_pytorch.py,sha256=YVJJvqGPBdkKnug99p9bjqtbfecDXZKIB2iWVCe7RUY,5960
494
493
  model_compression_toolkit/trainable_infrastructure/__init__.py,sha256=uewpvlPkH9mBFt8IxoAgIfz6iEcvWbOImm_fb6_BxD8,1543
495
494
  model_compression_toolkit/trainable_infrastructure/common/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
495
+ model_compression_toolkit/trainable_infrastructure/common/annealing_schedulers.py,sha256=qm2_wa61nga08Jdcl3RkgTsJ0zyHNjZ_A6I2--oVOig,2455
496
496
  model_compression_toolkit/trainable_infrastructure/common/base_trainable_quantizer.py,sha256=IF50ASBUvVrOVqlJ1nHNxZxKXSuCanjhUX0YjMB-rRg,7946
497
497
  model_compression_toolkit/trainable_infrastructure/common/constants.py,sha256=HN120boJxAnEXNrLSj-o_s-VX4o6C-1ap_KZ4840sd0,875
498
498
  model_compression_toolkit/trainable_infrastructure/common/get_quantizer_config.py,sha256=Jxd4IjS_t0FwnA_S_WmZeVbh4VM6Da9ahKGPLp6ZhQo,6983
@@ -500,7 +500,9 @@ model_compression_toolkit/trainable_infrastructure/common/get_quantizers.py,sha2
500
500
  model_compression_toolkit/trainable_infrastructure/common/quant_utils.py,sha256=zdiew1jwR7tUKm9XWlHnAPxIZsAdKqbzzC2vH02j5wA,1505
501
501
  model_compression_toolkit/trainable_infrastructure/common/trainable_quantizer_config.py,sha256=My5Wz34jPOyh8z33OTpKnOobRB0cpO_Qgmtsd5lizHo,4791
502
502
  model_compression_toolkit/trainable_infrastructure/common/training_method.py,sha256=LUoeJkloowhZKuHTiOfzjmSUn2G-4of11-rbnL-h0P4,1194
503
+ model_compression_toolkit/trainable_infrastructure/common/util.py,sha256=oKuWi7E07a8zv5x9auhBugYE2RUQ7ojDh2XCs5koYJY,1090
503
504
  model_compression_toolkit/trainable_infrastructure/keras/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
505
+ model_compression_toolkit/trainable_infrastructure/keras/annealing_schedulers.py,sha256=sISNVxPsdm-Nd95PhoPSJ-2tFpINGlfrU7ZXaCByI-o,1278
504
506
  model_compression_toolkit/trainable_infrastructure/keras/base_keras_quantizer.py,sha256=LBc26z8pkpbcdKMTxpNBg5IyChLreHQ1lRgCVjNE37o,4202
505
507
  model_compression_toolkit/trainable_infrastructure/keras/config_serialization.py,sha256=txdWXdZoHazg-3MDPb9P-oXRM92LRn2G_8woEplwKaI,4360
506
508
  model_compression_toolkit/trainable_infrastructure/keras/load_model.py,sha256=DJHibcLo-UCuHV6UPLeVd7dKmPfkGXEiLqCCqvQrISM,3769
@@ -515,10 +517,9 @@ model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/s
515
517
  model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/symmetric_ste.py,sha256=THY5eZ_69D1yzkXLhLg84ON_deNUAD_qMJ6A5C5znDM,7359
516
518
  model_compression_toolkit/trainable_infrastructure/keras/activation_quantizers/ste/uniform_ste.py,sha256=XEypYorBnSBLj6sh1pHCNaSjeCToYVlERWIHxUoXvuc,5733
517
519
  model_compression_toolkit/trainable_infrastructure/pytorch/__init__.py,sha256=huHoBUcKNB6BnY6YaUCcFvdyBtBI172ZoUD8ZYeNc6o,696
518
- model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py,sha256=IdUBpZUcOXHLPp2OhwbO_Kytee3OTVuy2032N-tm694,1686
520
+ model_compression_toolkit/trainable_infrastructure/pytorch/annealing_schedulers.py,sha256=W5NPQiwIAd2dpaoU9WfRwSt0ljrrePj5lwPk6d1yVwQ,1333
519
521
  model_compression_toolkit/trainable_infrastructure/pytorch/base_pytorch_quantizer.py,sha256=lWc5EG3ptrP85n69EHGKFkIadnrKEBMKnB5YXQ5AmXo,2745
520
522
  model_compression_toolkit/trainable_infrastructure/pytorch/quantizer_utils.py,sha256=1yOXKghUYfw2hmzbqTuNagIXBoM-wR2bP-ul66-mnDw,7767
521
- model_compression_toolkit/trainable_infrastructure/pytorch/util.py,sha256=oKuWi7E07a8zv5x9auhBugYE2RUQ7ojDh2XCs5koYJY,1090
522
523
  model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/__init__.py,sha256=73CXhqqNTvDpsvlJXclrGJq-vsCUYCI64ILu1y2mtvw,1056
523
524
  model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/base_activation_quantizer.py,sha256=X6E6mewWQot_aAkz3UxW5X0-Fjl_aMMjs3A-Af5eL6w,972
524
525
  model_compression_toolkit/trainable_infrastructure/pytorch/activation_quantizers/lsq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
@@ -557,16 +558,20 @@ tests_pytest/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
557
558
  tests_pytest/keras/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
558
559
  tests_pytest/keras/core/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
559
560
  tests_pytest/keras/core/test_data_util.py,sha256=XSoPu_ci1xy2EtK-3OWGpESr-Meg1GDaxuSvcj3yt-w,3915
561
+ tests_pytest/keras/gptq/__init__.py,sha256=pKAdbTCFM_2BrZXUtTIw0ouKotrWwUDF_hP3rPwCM2k,696
562
+ tests_pytest/keras/gptq/test_gradual_act_quantization.py,sha256=iwKaLI7QQ8H3qj6zmwwfd2ZOwRcCr8T-v_4llSh_chM,4804
563
+ tests_pytest/keras/trainable_infrastructure/__init__.py,sha256=DvaMXJtJZHAqOm96NdfBiNQsbN2sc9bG2kkyY-mpPh8,710
564
+ tests_pytest/keras/trainable_infrastructure/test_linear_annealing.py,sha256=dZjrMHVIiEVRNDYR3a4lZaXF2ElxFx32KAXXQvDz-v8,1793
560
565
  tests_pytest/pytorch/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
561
566
  tests_pytest/pytorch/core/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
562
567
  tests_pytest/pytorch/core/test_data_util.py,sha256=Bg3c21YVfXE1SAUlTao553gXcITTKF4CPeKtl3peBTE,5604
563
568
  tests_pytest/pytorch/gptq/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
564
569
  tests_pytest/pytorch/gptq/test_annealing_cfg.py,sha256=hGC7L6mp3N1ygcJ3OctgS_Fz2JY75q5aswolJkbHkZM,2208
565
- tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=tI01aFIUaiCILL5Qn--p1E_rLBUelxLdSY3k52lwcx0,4594
570
+ tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=Dg2cg1X8u9Jxm7Y6tlZIGH81EPoW_vYorcdDExdj02w,4630
566
571
  tests_pytest/pytorch/trainable_infrastructure/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
567
- tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=eNOpSp0GoLxtEdiRypBp8jaujXfdNxBwKh5Rd-P7WLs,1786
568
- mct_nightly-2.2.0.20241025.505.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
569
- mct_nightly-2.2.0.20241025.505.dist-info/METADATA,sha256=Sp107NNGeo7gOmqCwchsuQSnMQ5UR5VkkioWUUCgsak,20830
570
- mct_nightly-2.2.0.20241025.505.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
571
- mct_nightly-2.2.0.20241025.505.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
572
- mct_nightly-2.2.0.20241025.505.dist-info/RECORD,,
572
+ tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=zErt9tOu7oupjpv08cvd1Cxvdk9qvP7GMUP6EhefK0c,1814
573
+ mct_nightly-2.2.0.20241027.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
574
+ mct_nightly-2.2.0.20241027.532.dist-info/METADATA,sha256=F0SYwH78ncR2_2Cx8EnUV0f_eydBxIdBjGnOd22oxqM,20830
575
+ mct_nightly-2.2.0.20241027.532.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
576
+ mct_nightly-2.2.0.20241027.532.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
577
+ mct_nightly-2.2.0.20241027.532.dist-info/RECORD,,
@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
27
27
  from model_compression_toolkit import pruning
28
28
  from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
29
29
 
30
- __version__ = "2.2.0.20241025.000505"
30
+ __version__ = "2.2.0.20241027.000532"
@@ -22,4 +22,11 @@ SOFT_ROUNDING_ZETA = 1.1
22
22
 
23
23
  # GPTQ config constant
24
24
  QUANT_PARAM_LEARNING_STR = 'quantization_parameter_learning'
25
- MAX_LSB_STR = 'max_lsbs_change_map'
25
+ MAX_LSB_STR = 'max_lsbs_change_map'
26
+
27
+ # GPTQ learning hyperparameters
28
+ LR_DEFAULT = 3e-2
29
+ LR_REST_DEFAULT = 1e-4
30
+ LR_BIAS_DEFAULT = 1e-3
31
+ LR_QUANTIZATION_PARAM_DEFAULT = 1e-3
32
+ GPTQ_MOMENTUM = 0.9
@@ -13,23 +13,23 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  from functools import partial
16
- from typing import Callable
16
+ from typing import Callable, Any
17
17
 
18
18
  from model_compression_toolkit.gptq import GradientPTQConfig, QFractionLinearAnnealingConfig
19
- from model_compression_toolkit.trainable_infrastructure import BasePytorchTrainableQuantizer
20
-
21
- from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
19
+ from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import BaseTrainableQuantizer
22
20
 
23
21
 
24
22
  def get_gradual_activation_quantizer_wrapper_factory(gptq_config: GradientPTQConfig,
25
- get_total_grad_steps_fn: Callable[[], int]) \
26
- -> Callable[[BasePytorchTrainableQuantizer], 'GradualActivationQuantizerWrapper']:
23
+ get_total_grad_steps_fn: Callable[[], int],
24
+ fw_linear_annealing_scheduler: type) \
25
+ -> Callable[[Any], 'GradualActivationQuantizerWrapper']:
27
26
  """
28
27
  Get a factory for 'GradualActivationQuantizerWrapper'.
29
28
 
30
29
  Args:
31
30
  gptq_config: GPTQ configuration.
32
31
  get_total_grad_steps_fn: a callable to obtain the total expected number of gradient steps.
32
+ fw_linear_annealing_scheduler: LinearAnnealingScheduler implementation of the framework (tf/pytorch).
33
33
 
34
34
  Returns:
35
35
  A factory function to build 'GradualActivationQuantizerWrapper' from Quantizer.
@@ -40,9 +40,9 @@ def get_gradual_activation_quantizer_wrapper_factory(gptq_config: GradientPTQCon
40
40
  annealing_cfg = gptq_config.gradual_activation_quantization_config.q_fraction_scheduler_policy
41
41
  if isinstance(annealing_cfg, QFractionLinearAnnealingConfig):
42
42
  t_end = annealing_cfg.end_step or get_total_grad_steps_fn()
43
- factor_scheduler = LinearAnnealingScheduler(t_start=annealing_cfg.start_step, t_end=t_end,
44
- initial_val=annealing_cfg.initial_q_fraction,
45
- target_val=annealing_cfg.target_q_fraction)
43
+ factor_scheduler = fw_linear_annealing_scheduler(t_start=annealing_cfg.start_step, t_end=t_end,
44
+ initial_val=annealing_cfg.initial_q_fraction,
45
+ target_val=annealing_cfg.target_q_fraction)
46
46
  else:
47
47
  raise ValueError(f'Unknown annealing policy {annealing_cfg}')
48
48
 
@@ -64,7 +64,7 @@ class GradualActivationQuantizerWrapper:
64
64
  quantizer: quantizer to wrap.
65
65
  q_fraction_scheduler: a callable that accepts a gradient step and returns the corresponding quantized fraction.
66
66
  """
67
- def __init__(self, quantizer: BasePytorchTrainableQuantizer, q_fraction_scheduler: Callable[[int], float]):
67
+ def __init__(self, quantizer: BaseTrainableQuantizer, q_fraction_scheduler: Callable[[int], float]):
68
68
  self.quantizer = quantizer
69
69
  self.q_fraction_scheduler = q_fraction_scheduler
70
70
  self.step_cnt = 0
@@ -12,17 +12,20 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from typing import Callable
16
15
 
17
- from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig
18
- from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import \
19
- SoftQuantizerRegularization
20
- from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
16
+ from tqdm import tqdm
17
+ from typing import Callable, Type
21
18
 
19
+ from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig
22
20
 
21
+ # Common warmup fraction
23
22
  WARMUP_STEP_FRACTION = 0.2
24
23
 
25
- def get_regularization(gptq_config: GradientPTQConfig, get_total_grad_steps_fn: Callable[[], int]) -> Callable:
24
+
25
+ def get_regularization(gptq_config: GradientPTQConfig,
26
+ get_total_grad_steps_fn: Callable[[], int],
27
+ SoftQuantizerRegularizationFWClass: Type,
28
+ LinearAnnealingSchedulerFWClass: Type) -> Callable:
26
29
  """
27
30
  Returns a function that computes the regularization term for GPTQ training based on the given
28
31
  rounding type in the GPTQ configuration.
@@ -30,15 +33,26 @@ def get_regularization(gptq_config: GradientPTQConfig, get_total_grad_steps_fn:
30
33
  Args:
31
34
  gptq_config: A GPTQ configuration.
32
35
  get_total_grad_steps_fn: a callable to obtain the total expected number of gradient steps.
36
+ SoftQuantizerRegularizationFWClass: The class to use for soft quantizer regularization (framework-specific).
37
+ LinearAnnealingSchedulerFWClass: The class to use for the annealing scheduler (framework-specific).
33
38
 
34
- Returns: A function for computing the regularization. If there is no regularization function defined for the given
35
- rounding type, then it returns a function that just returns 0.
36
-
39
+ Returns:
40
+ Callable: A function for computing the regularization. If there is no regularization function
41
+ defined for the given rounding type, then it returns a function that just returns 0.
37
42
  """
38
43
  if gptq_config.rounding_type == RoundingType.SoftQuantizer:
39
44
  total_gradient_steps = get_total_grad_steps_fn()
40
45
  t_start = int(WARMUP_STEP_FRACTION * total_gradient_steps)
41
- scheduler = LinearAnnealingScheduler(t_start=t_start, t_end=total_gradient_steps, initial_val=20, target_val=2)
42
- return SoftQuantizerRegularization(scheduler)
46
+
47
+ # Directly initializing the scheduler within the method
48
+ scheduler = LinearAnnealingSchedulerFWClass(
49
+ t_start=t_start,
50
+ t_end=total_gradient_steps,
51
+ initial_val=20,
52
+ target_val=2
53
+ )
54
+
55
+ # Return the framework-specific soft quantizer regularization
56
+ return SoftQuantizerRegularizationFWClass(scheduler)
43
57
  else:
44
58
  return lambda *args, **kwargs: 0
@@ -26,9 +26,14 @@ from model_compression_toolkit.core.common.user_info import UserInformation
26
26
  from model_compression_toolkit.core.keras.back2framework.keras_model_builder import KerasModelBuilder
27
27
  from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader
28
28
  from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
29
+ from model_compression_toolkit.gptq.common.gradual_activation_quantization import \
30
+ get_gradual_activation_quantizer_wrapper_factory
31
+ from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
29
32
  from model_compression_toolkit.gptq.keras.quantizer.quantization_builder import quantization_builder
30
33
  from model_compression_toolkit.logger import Logger
31
34
  from mct_quantizers import KerasActivationQuantizationHolder
35
+ from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
36
+ from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
32
37
 
33
38
  if version.parse(tf.__version__) >= version.parse("2.13"):
34
39
  from keras.src.engine.base_layer import TensorFlowOpLayer
@@ -41,13 +46,12 @@ from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
41
46
  from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
42
47
  from model_compression_toolkit.core.common import Graph
43
48
  from model_compression_toolkit.gptq.keras.graph_info import get_weights_for_loss, get_gptq_trainable_parameters
44
- from model_compression_toolkit.gptq.keras.quantizer.regularization_factory import get_regularization
45
49
  from model_compression_toolkit.core.common.framework_info import FrameworkInfo
46
50
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
47
51
  import numpy as np
48
52
  import copy
49
53
  from model_compression_toolkit.core.keras.constants import BIAS, USE_BIAS
50
-
54
+ from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization
51
55
 
52
56
  class KerasGPTQTrainer(GPTQTrainer):
53
57
  """
@@ -78,6 +82,15 @@ class KerasGPTQTrainer(GPTQTrainer):
78
82
  hessian_info_service: HessianScoresService for fetching and computing Hessian's approximation scores.
79
83
 
80
84
  """
85
+
86
+ def _get_total_grad_steps():
87
+ return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
88
+
89
+ # This must be set before the model building (as it is required for activation holder construction),
90
+ # which occurs in the base constructor.
91
+ self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
92
+ gptq_config, _get_total_grad_steps, KerasLinearAnnealingScheduler)
93
+
81
94
  super().__init__(graph_float,
82
95
  graph_quant,
83
96
  gptq_config,
@@ -119,7 +132,10 @@ class KerasGPTQTrainer(GPTQTrainer):
119
132
 
120
133
  self.weights_for_average_loss = self._get_compare_points_loss_weights()
121
134
 
122
- self.reg_func = get_regularization(self.gptq_config, representative_data_gen)
135
+ self.reg_func = get_regularization(self.gptq_config,
136
+ _get_total_grad_steps,
137
+ SoftQuantizerRegularization,
138
+ KerasLinearAnnealingScheduler)
123
139
 
124
140
  def _get_compare_points_loss_weights(self):
125
141
  """ Get compare points weights for the distillation loss. """
@@ -185,14 +201,13 @@ class KerasGPTQTrainer(GPTQTrainer):
185
201
  _, activation_quantizers = quantization_builder(n, self.gptq_config) # TODO: split quantizers building into two functions: for weights and activations
186
202
 
187
203
  # Holder by definition uses a single quantizer for the activation quantization
188
- # thus we make sure this is the only possible case (unless it's a node with no activation
189
- # quantization, which in this case has an empty list).
190
- if len(activation_quantizers) == 1:
191
- return KerasActivationQuantizationHolder(activation_quantizers[0])
192
-
193
- Logger.critical(f"'KerasActivationQuantizationHolder' is designed to support a single quantizer, "
194
- f"but {len(activation_quantizers)} quantizers were found for node '{n}'. "
195
- f"Ensure only one quantizer is configured for each node's activation.")
204
+ # thus we make sure this is the only possible case.
205
+ if len(activation_quantizers) != 1:
206
+ Logger.critical(f"'KerasActivationQuantizationHolder' is designed to support a single quantizer, "
207
+ f"but {len(activation_quantizers)} quantizers were found for node '{n}'. "
208
+ f"Ensure only one quantizer is configured for each node's activation.")
209
+ quantizer = self.gradual_act_quantizer_wrapper_factory(activation_quantizers[0])
210
+ return KerasActivationQuantizationHolder(quantizer)
196
211
 
197
212
  def build_gptq_model(self) -> Tuple[Model, UserInformation]:
198
213
  """
@@ -14,17 +14,18 @@
14
14
  # ==============================================================================
15
15
  import copy
16
16
 
17
- from typing import Callable, Tuple
17
+ from typing import Callable, Tuple, Union
18
18
  from packaging import version
19
19
 
20
- from model_compression_toolkit.core.common.quantization.quantize_graph_weights import quantize_graph_weights
21
20
  from model_compression_toolkit.core.common.visualization.tensorboard_writer import init_tensorboard_writer
22
- from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
21
+ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR_DEFAULT, LR_REST_DEFAULT, \
22
+ LR_BIAS_DEFAULT, GPTQ_MOMENTUM
23
23
  from model_compression_toolkit.logger import Logger
24
24
  from model_compression_toolkit.constants import TENSORFLOW, ACT_HESSIAN_DEFAULT_BATCH_SIZE
25
25
  from model_compression_toolkit.verify_packages import FOUND_TF
26
26
  from model_compression_toolkit.core.common.user_info import UserInformation
27
- from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig
27
+ from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig, GPTQHessianScoresConfig, \
28
+ GradualActivationQuantizationConfig
28
29
  from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization
29
30
  from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
30
31
  from model_compression_toolkit.core import CoreConfig
@@ -32,13 +33,8 @@ from model_compression_toolkit.core.runner import core_runner
32
33
  from model_compression_toolkit.gptq.runner import gptq_runner
33
34
  from model_compression_toolkit.core.analyzer import analyzer_model_quantization
34
35
  from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
35
- from model_compression_toolkit.metadata import get_versions_dict, create_model_metadata
36
+ from model_compression_toolkit.metadata import create_model_metadata
36
37
 
37
- LR_DEFAULT = 0.15
38
- LR_REST_DEFAULT = 1e-4
39
- LR_BIAS_DEFAULT = 1e-4
40
- LR_QUANTIZATION_PARAM_DEFAULT = 1e-3
41
- GPTQ_MOMENTUM = 0.9
42
38
 
43
39
  if FOUND_TF:
44
40
  import tensorflow as tf
@@ -54,25 +50,25 @@ if FOUND_TF:
54
50
 
55
51
  # As from TF2.9 optimizers package is changed
56
52
  if version.parse(tf.__version__) < version.parse("2.9"):
57
- from keras.optimizer_v2.optimizer_v2 import OptimizerV2
53
+ from keras.optimizer_v2.optimizer_v2 import OptimizerV2 # pragma: no cover
58
54
  elif version.parse(tf.__version__) < version.parse("2.12"):
59
- from keras.optimizers.optimizer_v2.optimizer_v2 import OptimizerV2
55
+ from keras.optimizers.optimizer_v2.optimizer_v2 import OptimizerV2 # pragma: no cover
60
56
  else:
61
57
  from tensorflow.python.keras.optimizer_v2.optimizer_v2 import OptimizerV2
62
58
 
63
59
  DEFAULT_KERAS_TPC = get_target_platform_capabilities(TENSORFLOW, DEFAULT_TP_MODEL)
64
60
 
65
-
66
61
  def get_keras_gptq_config(n_epochs: int,
67
- optimizer: OptimizerV2 = tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT),
68
- optimizer_rest: OptimizerV2 = tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT),
62
+ optimizer: OptimizerV2 = None,
63
+ optimizer_rest: OptimizerV2 = None,
69
64
  loss: Callable = GPTQMultipleTensorsLoss(),
70
65
  log_function: Callable = None,
71
66
  use_hessian_based_weights: bool = True,
72
67
  regularization_factor: float = REG_DEFAULT,
73
- hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE) -> GradientPTQConfig:
68
+ hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE,
69
+ gradual_activation_quantization: Union[bool, GradualActivationQuantizationConfig] = False) -> GradientPTQConfig:
74
70
  """
75
- Create a GradientPTQConfigV2 instance for Keras models.
71
+ Create a GradientPTQConfig instance for Keras models.
76
72
 
77
73
  args:
78
74
  n_epochs (int): Number of epochs for running the representative dataset for fine-tuning.
@@ -83,9 +79,10 @@ if FOUND_TF:
83
79
  use_hessian_based_weights (bool): Whether to use Hessian-based weights for weighted average loss.
84
80
  regularization_factor (float): A floating point number that defines the regularization factor.
85
81
  hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
82
+ gradual_activation_quantization (bool, GradualActivationQuantizationConfig): If False, GradualActivationQuantization is disabled. If True, GradualActivationQuantization is enabled with the default settings. GradualActivationQuantizationConfig object can be passed to use non-default settings.
86
83
 
87
84
  returns:
88
- a GradientPTQConfigV2 object to use when fine-tuning the quantized model using gptq.
85
+ a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
89
86
 
90
87
  Examples:
91
88
 
@@ -94,7 +91,7 @@ if FOUND_TF:
94
91
  >>> import model_compression_toolkit as mct
95
92
  >>> import tensorflow as tf
96
93
 
97
- Create a GradientPTQConfigV2 to run for 5 epochs:
94
+ Create a GradientPTQConfig to run for 5 epochs:
98
95
 
99
96
  >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=5)
100
97
 
@@ -102,11 +99,24 @@ if FOUND_TF:
102
99
 
103
100
  >>> gptq_conf = mct.gptq.get_keras_gptq_config(n_epochs=3, optimizer=tf.keras.optimizers.Nadam())
104
101
 
105
- The configuration can be passed to :func:`~model_compression_toolkit.keras_post_training_quantization` in order to quantize a keras model using gptq.
102
+ The configuration can be passed to :func:`~model_compression_toolkit.keras_gradient_post_training_quantization` in order to quantize a keras model using gptq.
103
+
106
104
 
107
105
  """
106
+ optimizer = optimizer or tf.keras.optimizers.Adam(learning_rate=LR_DEFAULT)
107
+ optimizer_rest = optimizer_rest or tf.keras.optimizers.Adam(learning_rate=LR_REST_DEFAULT)
108
+
108
109
  bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT,
109
110
  momentum=GPTQ_MOMENTUM)
111
+
112
+ if isinstance(gradual_activation_quantization, bool):
113
+ gradual_quant_config = GradualActivationQuantizationConfig() if gradual_activation_quantization else None
114
+ elif isinstance(gradual_activation_quantization, GradualActivationQuantizationConfig):
115
+ gradual_quant_config = gradual_activation_quantization
116
+ else:
117
+ raise TypeError(f'gradual_activation_quantization argument should be bool or '
118
+ f'GradualActivationQuantizationConfig, received {type(gradual_activation_quantization)}')
119
+
110
120
  return GradientPTQConfig(n_epochs,
111
121
  optimizer,
112
122
  optimizer_rest=optimizer_rest,
@@ -116,7 +126,8 @@ if FOUND_TF:
116
126
  optimizer_bias=bias_optimizer,
117
127
  use_hessian_based_weights=use_hessian_based_weights,
118
128
  regularization_factor=regularization_factor,
119
- hessian_weights_config=GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size))
129
+ hessian_weights_config=GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size),
130
+ gradual_activation_quantization_config=gradual_quant_config)
120
131
 
121
132
 
122
133
  def keras_gradient_post_training_quantization(in_model: Model, representative_data_gen: Callable,
@@ -251,13 +262,13 @@ if FOUND_TF:
251
262
  else:
252
263
  # If tensorflow is not installed,
253
264
  # we raise an exception when trying to use these functions.
254
- def get_keras_gptq_config(*args, **kwargs):
265
+ def get_keras_gptq_config(*args, **kwargs): # pragma: no cover
255
266
  Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
256
267
  "get_keras_gptq_config. The 'tensorflow' package is missing or is "
257
268
  "installed with a version higher than 2.15.") # pragma: no cover
258
269
 
259
270
 
260
- def keras_gradient_post_training_quantization(*args, **kwargs):
271
+ def keras_gradient_post_training_quantization(*args, **kwargs): # pragma: no cover
261
272
  Logger.critical("Tensorflow must be installed with a version of 2.15 or lower to use "
262
273
  "keras_gradient_post_training_quantization. The 'tensorflow' package is missing or is "
263
- "installed with a version higher than 2.15.") # pragma: no cover
274
+ "installed with a version higher than 2.15.")
@@ -16,18 +16,18 @@ from typing import Dict, List, Tuple
16
16
 
17
17
  from model_compression_toolkit.gptq import GradientPTQConfig
18
18
  from model_compression_toolkit.core import common
19
- from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
20
- get_inferable_quantizer_kwargs
21
19
  from model_compression_toolkit.gptq.keras.quantizer.base_keras_gptq_quantizer import BaseKerasGPTQTrainableQuantizer
22
20
  from mct_quantizers import QuantizationTarget
23
- from mct_quantizers.common.get_quantizers import get_inferable_quantizer_class
24
21
  from mct_quantizers.keras.quantizers import BaseKerasInferableQuantizer
25
22
 
26
23
  from model_compression_toolkit.logger import Logger
24
+ from model_compression_toolkit.trainable_infrastructure import TrainingMethod
27
25
  from model_compression_toolkit.trainable_infrastructure.common.get_quantizer_config import \
28
- get_trainable_quantizer_weights_config
26
+ get_trainable_quantizer_weights_config, get_trainable_quantizer_activation_config
29
27
  from model_compression_toolkit.trainable_infrastructure.common.get_quantizers import \
30
28
  get_trainable_quantizer_class
29
+ from model_compression_toolkit.trainable_infrastructure.keras.activation_quantizers.base_activation_quantizer import \
30
+ BaseKerasActivationTrainableQuantizer
31
31
 
32
32
 
33
33
  def quantization_builder(n: common.BaseNode,
@@ -70,12 +70,13 @@ def quantization_builder(n: common.BaseNode,
70
70
 
71
71
  quant_method = n.final_activation_quantization_cfg.activation_quantization_method
72
72
 
73
- quantizer_class = get_inferable_quantizer_class(quant_target=QuantizationTarget.Activation,
73
+ quantizer_class = get_trainable_quantizer_class(quant_target=QuantizationTarget.Activation,
74
+ quantizer_id=TrainingMethod.STE,
74
75
  quant_method=quant_method,
75
- quantizer_base_class=BaseKerasInferableQuantizer)
76
+ quantizer_base_class=BaseKerasActivationTrainableQuantizer)
77
+ cfg = get_trainable_quantizer_activation_config(n, None)
76
78
 
77
- kwargs = get_inferable_quantizer_kwargs(n.final_activation_quantization_cfg, QuantizationTarget.Activation)
78
-
79
- activation_quantizers.append(quantizer_class(**kwargs))
79
+ # freeze_quant_params is True since in GPTQ the activation quantization parameters should not be trained.
80
+ activation_quantizers.append(quantizer_class(cfg, freeze_quant_params=True))
80
81
 
81
82
  return weights_quantizers, activation_quantizers
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- from typing import List
15
+ from typing import List, Callable
16
16
 
17
17
  import tensorflow as tf
18
18
  from keras import Model
@@ -22,61 +22,21 @@ from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribut
22
22
  from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
23
23
 
24
24
 
25
- class LinearTempDecay:
26
- """
27
- Annealing process for the soft quantizer regularization temperature term.
28
- """
29
-
30
- def __init__(self, t_max: int, rel_start_decay: float = 0.2, start_b: int = 20, end_b: int = 2):
31
- """
32
- Initializes a LinearTempDecay object.
33
-
34
- Args:
35
- t_max: maximal time step.
36
- rel_start_decay: Decay step size at the beginning of the process.
37
- start_b: Starting value of the regularization term.
38
- end_b: Target value of the regularization term.
39
- """
40
-
41
- self.t_max = t_max
42
- self.start_decay = rel_start_decay * t_max
43
- self.start_b = start_b
44
- self.end_b = end_b
45
-
46
- def __call__(self, t: int) -> float:
47
- """
48
- Cosine annealing scheduler for soft quantizer regularization temperature term.
49
-
50
- Args:
51
- t: The current time step.
52
-
53
- Returns: Scheduled temperature.
54
- """
55
-
56
- is_before_start_decay = tf.cast(t < self.start_decay, tf.float32)
57
-
58
- rel_t = (t - self.start_decay) / (self.t_max - self.start_decay)
59
-
60
- return self.start_b * is_before_start_decay + \
61
- (1 - is_before_start_decay) * \
62
- (self.end_b + (self.start_b - self.end_b) * tf.math.maximum(0.0, (1 - rel_t)))
63
-
64
25
 
65
26
  class SoftQuantizerRegularization:
66
27
  """
67
28
  A class to handle the computation of soft quantizer regularization for GPTQ training.
68
29
  """
69
30
 
70
- def __init__(self, total_gradient_steps: int):
31
+ def __init__(self, beta_scheduler: Callable[[int], float]):
71
32
  """
72
33
  Initializes the regularization computation object with a LinearDecay object.
73
34
 
74
35
  Args:
75
- total_gradient_steps: The number of gradient steps during optimization.
36
+ beta_scheduler: a callable that accepts current time step and returns a corresponding beta value.
76
37
  """
77
38
  # Initializing the temperature decay according to the number of expected gradient steps
78
- self.linear_decay = LinearTempDecay(total_gradient_steps)
79
-
39
+ self.beta_scheduler = beta_scheduler
80
40
  self.count_iter = tf.Variable(0.)
81
41
 
82
42
 
@@ -91,7 +51,7 @@ class SoftQuantizerRegularization:
91
51
  Returns: Regularization value.
92
52
  """
93
53
  soft_reg_aux: List[tf.Tensor] = []
94
- b = self.linear_decay(self.count_iter.value())
54
+ b = self.beta_scheduler(self.count_iter.value())
95
55
  for layer in model.layers:
96
56
  if isinstance(layer, KerasTrainableQuantizationWrapper):
97
57
  kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
@@ -17,15 +17,18 @@ from typing import Callable, List, Tuple, Union, Generator
17
17
 
18
18
  import numpy as np
19
19
  import torch
20
- from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
21
20
  from torch.nn import Module
22
21
  from torch.utils.data import DataLoader
23
22
  from tqdm import tqdm
24
23
 
24
+ from model_compression_toolkit.gptq.common.gradual_activation_quantization import get_gradual_activation_quantizer_wrapper_factory
25
+ from model_compression_toolkit.gptq.common.regularization_factory import get_regularization
26
+
25
27
  from model_compression_toolkit.core.common import Graph, BaseNode
26
28
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
27
29
  from model_compression_toolkit.core.common.framework_info import FrameworkInfo
28
30
  from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresGranularity
31
+
29
32
  from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
30
33
  from model_compression_toolkit.core.pytorch.constants import BIAS
31
34
  from model_compression_toolkit.core.pytorch.data_util import FixedDatasetFromGenerator, IterableDatasetFromGenerator, \
@@ -34,14 +37,15 @@ from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, set_mo
34
37
  from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
35
38
  from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
36
39
  from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
37
- from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters, \
38
- get_weights_for_loss
39
- from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import \
40
- get_gradual_activation_quantizer_wrapper_factory
40
+ from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters, get_weights_for_loss
41
41
  from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
42
- from model_compression_toolkit.gptq.pytorch.quantizer.regularization_factory import get_regularization
42
+
43
+ from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
44
+ from model_compression_toolkit.trainable_infrastructure.common.util import get_total_grad_steps
45
+ from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
46
+ from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import SoftQuantizerRegularization as PytorchSoftQuantizerRegularization
47
+
43
48
  from model_compression_toolkit.logger import Logger
44
- from model_compression_toolkit.trainable_infrastructure.pytorch.util import get_total_grad_steps
45
49
 
46
50
 
47
51
  class PytorchGPTQTrainer(GPTQTrainer):
@@ -78,7 +82,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
78
82
 
79
83
  # must be set prior to model building in the base class constructor
80
84
  self.gradual_act_quantizer_wrapper_factory = get_gradual_activation_quantizer_wrapper_factory(
81
- gptq_config, _get_total_grad_steps)
85
+ gptq_config, _get_total_grad_steps, PytorchLinearAnnealingScheduler)
82
86
 
83
87
  super().__init__(graph_float,
84
88
  graph_quant,
@@ -121,7 +125,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
121
125
  else:
122
126
  self.train_dataloader = self._prepare_train_dataloader_for_non_sla(representative_data_gen)
123
127
 
124
- self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps)
128
+ self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps, PytorchSoftQuantizerRegularization, PytorchLinearAnnealingScheduler)
125
129
 
126
130
  def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
127
131
  """
@@ -26,18 +26,15 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
26
26
  from model_compression_toolkit.core.runner import core_runner
27
27
  from model_compression_toolkit.gptq.common.gptq_config import (
28
28
  GradientPTQConfig, GPTQHessianScoresConfig, GradualActivationQuantizationConfig)
29
- from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
30
- from model_compression_toolkit.gptq.keras.quantization_facade import GPTQ_MOMENTUM
29
+ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT, LR_DEFAULT, LR_REST_DEFAULT, \
30
+ LR_BIAS_DEFAULT, GPTQ_MOMENTUM
31
31
  from model_compression_toolkit.gptq.runner import gptq_runner
32
32
  from model_compression_toolkit.logger import Logger
33
33
  from model_compression_toolkit.metadata import create_model_metadata
34
34
  from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
35
35
  from model_compression_toolkit.verify_packages import FOUND_TORCH
36
36
 
37
- LR_DEFAULT = 1e-4
38
- LR_REST_DEFAULT = 1e-4
39
- LR_BIAS_DEFAULT = 1e-4
40
- LR_QUANTIZATION_PARAM_DEFAULT = 1e-4
37
+
41
38
 
42
39
  if FOUND_TORCH:
43
40
  from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
@@ -76,10 +73,6 @@ if FOUND_TORCH:
76
73
  regularization_factor (float): A floating point number that defines the regularization factor.
77
74
  hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
78
75
  use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss.
79
- gradual_activation_quantization (bool, GradualActivationQuantizationConfig):
80
- If False, GradualActivationQuantization is disabled.
81
- If True, GradualActivationQuantization is enabled with the default settings.
82
- GradualActivationQuantizationConfig object can be passed to use non-default settings.
83
76
 
84
77
  returns:
85
78
  a GradientPTQConfig object to use when fine-tuning the quantized model using gptq.
@@ -96,9 +89,6 @@ if FOUND_TORCH:
96
89
  >>> import torch
97
90
  >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, optimizer=torch.optim.Adam([torch.Tensor(1)]))
98
91
 
99
- To enable Gradual Activation Quantization with non-default settings build GradualActivationQuantizationConfig:
100
- >>> gradual_act_conf = mct.gptq.GradualActivationQuantizationConfig(mct.gptq.QFractionLinearAnnealingConfig(initial_q_fraction=0.2))
101
- >>> gptq_conf = mct.gptq.get_pytorch_gptq_config(n_epochs=3, gradual_activation_quantization=gradual_act_conf)
102
92
  The configuration can be passed to :func:`~model_compression_toolkit.pytorch_gradient_post_training_quantization` in order to quantize a pytorch model using gptq.
103
93
 
104
94
  """
@@ -18,7 +18,6 @@ from typing import Union
18
18
  import numpy as np
19
19
  import tensorflow as tf
20
20
  from tensorflow.python.framework.tensor_shape import TensorShape
21
- from model_compression_toolkit.constants import SIGNED
22
21
 
23
22
  from model_compression_toolkit.trainable_infrastructure import TrainingMethod
24
23
 
@@ -29,10 +28,9 @@ from model_compression_toolkit.qat.common import THRESHOLD_TENSOR
29
28
  from model_compression_toolkit import constants as C
30
29
 
31
30
  from model_compression_toolkit.qat.keras.quantizer.base_keras_qat_weight_quantizer import BaseKerasQATWeightTrainableQuantizer
32
- from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig
33
- from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer, ActivationPOTInferableQuantizer, ActivationSymmetricInferableQuantizer
31
+ from model_compression_toolkit.trainable_infrastructure import TrainableQuantizerWeightsConfig
32
+ from mct_quantizers.keras.quantizers import WeightsPOTInferableQuantizer, WeightsSymmetricInferableQuantizer
34
33
  from model_compression_toolkit.trainable_infrastructure.common.base_trainable_quantizer import VariableGroup
35
- from model_compression_toolkit.qat.keras.quantizer.quant_utils import ste_round, grad_scale
36
34
  from model_compression_toolkit.trainable_infrastructure.keras.quantizer_utils import symmetric_lsq_quantizer
37
35
 
38
36
 
@@ -0,0 +1,68 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from abc import abstractmethod
16
+
17
+
18
+ class BaseLinearAnnealingScheduler:
19
+ def __init__(self, t_start: int, t_end: int, initial_val: float, target_val: float):
20
+ """
21
+ Base class for Linear annealing scheduler. Returns the corresponding annealed value per time step.
22
+
23
+ Args:
24
+ t_start: Time step to begin annealing.
25
+ t_end: Time step to complete annealing.
26
+ initial_val: Initial value before annealing.
27
+ target_val: Target value after annealing.
28
+
29
+ Raises:
30
+ ValueError: If t_start is not in the range [0, t_end).
31
+ """
32
+ if not (0 <= t_start < t_end):
33
+ raise ValueError(f'Expected 0 <= t_start < t_end, actual {t_end=} {t_start=}')
34
+
35
+ self.t_start = t_start
36
+ self.t_end = t_end
37
+ self.initial_val = initial_val
38
+ self.target_val = target_val
39
+
40
+ @abstractmethod
41
+ def _compute_factor(self, t: int) -> float:
42
+ """
43
+ Abstract method to compute the annealing factor based on time step `t`.
44
+
45
+ Args:
46
+ t: Current time step.
47
+
48
+ Returns:
49
+ float: Annealing factor, typically in the range [0, 1].
50
+
51
+ Raises:
52
+ NotImplementedError: If this method is not overridden in the subclass.
53
+ """
54
+ raise NotImplementedError("This method should be overridden in subclasses")
55
+
56
+ def __call__(self, t: int) -> float:
57
+ """
58
+ Calculates the annealed value based on the current time step `t`.
59
+
60
+ Args:
61
+ t: Current time step.
62
+
63
+ Returns:
64
+ float: Annealed value between initial_val and target_val.
65
+ """
66
+ factor = self._compute_factor(t)
67
+ return self.initial_val + factor * (self.target_val - self.initial_val)
68
+
@@ -0,0 +1,32 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import tensorflow as tf
16
+
17
+ from model_compression_toolkit.trainable_infrastructure.common.annealing_schedulers import BaseLinearAnnealingScheduler
18
+
19
+
20
+ class KerasLinearAnnealingScheduler(BaseLinearAnnealingScheduler):
21
+ def _compute_factor(self, t: int) -> float:
22
+ """
23
+ Computes the annealing factor for Keras models.
24
+
25
+ Args:
26
+ t: Current time step.
27
+
28
+ Returns:
29
+ float: Clipped annealing factor between 0 and 1.
30
+ """
31
+ factor = (t - self.t_start) / (self.t_end - self.t_start)
32
+ return tf.clip_by_value(factor, 0, 1)
@@ -13,27 +13,19 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
16
+ from model_compression_toolkit.trainable_infrastructure.common.annealing_schedulers import BaseLinearAnnealingScheduler
16
17
 
17
18
 
18
- class LinearAnnealingScheduler:
19
- def __init__(self, t_start: int, t_end: int, initial_val: float, target_val: float):
19
+ class PytorchLinearAnnealingScheduler(BaseLinearAnnealingScheduler):
20
+ def _compute_factor(self, t: int) -> float:
20
21
  """
21
- Linear annealing scheduler. Returns the corresponding annealed value per time step.
22
+ Computes the annealing factor for torch models.
22
23
 
23
24
  Args:
24
- t_start: time step to begin annealing.
25
- t_end: time step to complete annealing.
26
- initial_val: initial value.
27
- target_val: target value.
28
- """
29
- if not (0 <= t_start < t_end):
30
- raise ValueError(f'Expected 0 <= t_start < t_end, actual {t_end=} {t_start=}')
31
-
32
- self.t_start = t_start
33
- self.t_end = t_end
34
- self.initial_val = initial_val
35
- self.target_val = target_val
25
+ t: Current time step.
36
26
 
37
- def __call__(self, t: int) -> float:
38
- factor = to_torch_tensor((t - self.t_start) / (self.t_end - self.t_start)).clip(0, 1)
39
- return self.initial_val + factor * (self.target_val - self.initial_val)
27
+ Returns:
28
+ float: Clipped annealing factor between 0 and 1.
29
+ """
30
+ factor = to_torch_tensor((t - self.t_start) / (self.t_end - self.t_start))
31
+ return factor.clip(0, 1)
@@ -0,0 +1,14 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
@@ -0,0 +1,102 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ from unittest.mock import Mock
16
+ import pytest
17
+ import numpy as np
18
+ import tensorflow as tf
19
+
20
+ from model_compression_toolkit.gptq.common.gradual_activation_quantization import GradualActivationQuantizerWrapper, \
21
+ get_gradual_activation_quantizer_wrapper_factory
22
+ from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
23
+ from model_compression_toolkit.gptq import GradientPTQConfig, GradualActivationQuantizationConfig, QFractionLinearAnnealingConfig
24
+
25
+
26
+
27
+ @pytest.fixture
28
+ def x():
29
+ return tf.random.normal((2, 5, 6, 7), seed=42, dtype=tf.float32)
30
+
31
+
32
+ class Quantizer:
33
+ def __call__(self, x, training):
34
+ self.training = training
35
+ return 3 * x + 1
36
+
37
+
38
+ class TestGradualActivationQuantization:
39
+
40
+ def test_gradual_act_quant_wrapper(self, x):
41
+ quantizer = Quantizer()
42
+ qw = GradualActivationQuantizerWrapper(quantizer, q_fraction_scheduler=lambda t: t / (t + 1))
43
+
44
+ y0, y1, y2 = [qw(x, training=True) for _ in range(3)]
45
+ assert np.allclose(y0.numpy(), x.numpy()) # t=0
46
+ assert np.allclose(y1.numpy(), 0.5 * x.numpy() + (1.5 * x.numpy() + 0.5)) # t=1
47
+ assert np.allclose(y2.numpy(), x.numpy() / 3 + (2 * x.numpy() + 2 / 3)) # t=2
48
+ assert quantizer.training is True
49
+
50
+ _ = qw(x, training=False)
51
+ assert quantizer.training is False # correct flag was propagated
52
+
53
+ def test_factory_no_qdrop(self):
54
+ quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg=None, get_grad_steps_fn=None)
55
+ assert quantizer_wrapper is quantizer
56
+
57
+ @pytest.mark.parametrize('end_step', (20, None))
58
+ def test_factory_linear(self, x, end_step):
59
+ qdrop_cfg = GradualActivationQuantizationConfig(
60
+ QFractionLinearAnnealingConfig(initial_q_fraction=0.3, target_q_fraction=0.8, start_step=10,
61
+ end_step=end_step)
62
+ )
63
+
64
+ def get_total_steps():
65
+ if end_step is None:
66
+ return 50
67
+ assert False # should not be called if end_step is passed
68
+
69
+ quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, get_total_steps)
70
+
71
+ scheduler = quantizer_wrapper.q_fraction_scheduler
72
+ assert isinstance(scheduler, KerasLinearAnnealingScheduler)
73
+ exp_end_step = 50 if end_step is None else end_step
74
+ assert scheduler.t_start == 10
75
+ assert scheduler.t_end == exp_end_step
76
+ assert scheduler.initial_val == 0.3
77
+ assert scheduler.target_val == 0.8
78
+
79
+ y = [quantizer_wrapper(x, training=True) for _ in range(exp_end_step + 1)]
80
+
81
+ assert np.allclose(y[9].numpy(), 0.7 * x.numpy() + 0.3 * quantizer(x, training=True).numpy())
82
+ assert np.allclose(y[10].numpy(), 0.7 * x.numpy() + 0.3 * quantizer(x, training=True).numpy())
83
+ assert np.allclose(y[-1].numpy(), 0.2 * x.numpy() + 0.8 * quantizer(x, training=True).numpy())
84
+
85
+ def test_factory_linear_common_case(self, x):
86
+ # validate that we actually implemented the right thing - on first call float input, on last call fully quantized
87
+ qdrop_cfg = GradualActivationQuantizationConfig(
88
+ QFractionLinearAnnealingConfig(initial_q_fraction=0, target_q_fraction=1, start_step=0, end_step=None)
89
+ )
90
+ quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, lambda: 15)
91
+ y0, *_, y_last = [quantizer_wrapper(x, training=True) for _ in range(16)]
92
+ assert np.array_equal(y0.numpy(), x.numpy())
93
+ assert np.allclose(y_last.numpy(), quantizer(x, training=True).numpy())
94
+
95
+ def _run_factory_test(self, qdrop_cfg, get_grad_steps_fn):
96
+ # Mocks are used to just pass anything
97
+ gptq_cfg = GradientPTQConfig(n_epochs=5, optimizer=Mock(), loss=Mock(),
98
+ gradual_activation_quantization_config=qdrop_cfg)
99
+ factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn, KerasLinearAnnealingScheduler)
100
+ quantizer = Quantizer()
101
+ quantizer_wrapper = factory(quantizer)
102
+ return quantizer_wrapper, quantizer
@@ -0,0 +1,16 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+
@@ -0,0 +1,49 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import numpy as np
16
+ import pytest
17
+
18
+ from model_compression_toolkit.trainable_infrastructure.keras.annealing_schedulers import KerasLinearAnnealingScheduler
19
+
20
+
21
+ def test_linear_annealing():
22
+ scheduler = KerasLinearAnnealingScheduler(t_start=10, t_end=35, initial_val=3.4, target_val=-1.6)
23
+ for t in [0, 9, 10]:
24
+ assert _isclose(scheduler(t), 3.4)
25
+
26
+ for t in [35, 36, 1000]:
27
+ assert _isclose(scheduler(t), -1.6)
28
+
29
+ assert _isclose(scheduler(11), 3.2)
30
+ assert _isclose(scheduler(27), 0.)
31
+ assert _isclose(scheduler(34), -1.4)
32
+
33
+
34
+ def test_linear_annealing_ascending():
35
+ scheduler = KerasLinearAnnealingScheduler(t_start=0, t_end=5, initial_val=-0.5, target_val=1.5)
36
+ assert _isclose(scheduler(0), -0.5)
37
+ assert _isclose(scheduler(1), -0.1)
38
+ assert _isclose(scheduler(4), 1.1)
39
+ assert _isclose(scheduler(5), 1.5)
40
+
41
+
42
+ @pytest.mark.parametrize('start', [5, -1])
43
+ def test_invalid(start):
44
+ with pytest.raises(ValueError):
45
+ KerasLinearAnnealingScheduler(t_start=start, t_end=4, initial_val=1, target_val=0)
46
+
47
+
48
+ def _isclose(x, y):
49
+ return np.isclose(x, y)
@@ -18,9 +18,9 @@ import pytest
18
18
  import torch
19
19
 
20
20
  from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
21
- from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
21
+ from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
22
22
  from model_compression_toolkit.gptq import GradientPTQConfig, GradualActivationQuantizationConfig, QFractionLinearAnnealingConfig
23
- from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import (
23
+ from model_compression_toolkit.gptq.common.gradual_activation_quantization import (
24
24
  GradualActivationQuantizerWrapper, get_gradual_activation_quantizer_wrapper_factory)
25
25
 
26
26
 
@@ -68,7 +68,7 @@ class TestGradualActivationQuantization:
68
68
  quantizer_wrapper, quantizer = self._run_factory_test(qdrop_cfg, get_total_steps)
69
69
 
70
70
  scheduler = quantizer_wrapper.q_fraction_scheduler
71
- assert isinstance(scheduler, LinearAnnealingScheduler)
71
+ assert isinstance(scheduler, PytorchLinearAnnealingScheduler)
72
72
  exp_end_step = 50 if end_step is None else end_step
73
73
  assert scheduler.t_start == 10
74
74
  assert scheduler.t_end == exp_end_step
@@ -94,7 +94,7 @@ class TestGradualActivationQuantization:
94
94
  # Mocks are used to just pass anything
95
95
  gptq_cfg = GradientPTQConfig(n_epochs=5, optimizer=Mock(), loss=Mock(),
96
96
  gradual_activation_quantization_config=qdrop_cfg)
97
- factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn)
97
+ factory = get_gradual_activation_quantizer_wrapper_factory(gptq_cfg, get_grad_steps_fn, PytorchLinearAnnealingScheduler)
98
98
  quantizer = Quantizer()
99
99
  quantizer_wrapper = factory(quantizer)
100
100
  return quantizer_wrapper, quantizer
@@ -15,11 +15,11 @@
15
15
  import torch
16
16
  import pytest
17
17
 
18
- from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import LinearAnnealingScheduler
18
+ from model_compression_toolkit.trainable_infrastructure.pytorch.annealing_schedulers import PytorchLinearAnnealingScheduler
19
19
 
20
20
 
21
21
  def test_linear_annealing():
22
- scheduler = LinearAnnealingScheduler(t_start=10, t_end=35, initial_val=3.4, target_val=-1.6)
22
+ scheduler = PytorchLinearAnnealingScheduler(t_start=10, t_end=35, initial_val=3.4, target_val=-1.6)
23
23
  for t in [0, 9, 10]:
24
24
  assert _isclose(scheduler(t), 3.4)
25
25
 
@@ -32,7 +32,7 @@ def test_linear_annealing():
32
32
 
33
33
 
34
34
  def test_linear_annealing_ascending():
35
- scheduler = LinearAnnealingScheduler(t_start=0, t_end=5, initial_val=-0.5, target_val=1.5)
35
+ scheduler = PytorchLinearAnnealingScheduler(t_start=0, t_end=5, initial_val=-0.5, target_val=1.5)
36
36
  assert _isclose(scheduler(0), -0.5)
37
37
  assert _isclose(scheduler(1), -0.1)
38
38
  assert _isclose(scheduler(4), 1.1)
@@ -42,7 +42,7 @@ def test_linear_annealing_ascending():
42
42
  @pytest.mark.parametrize('start', [5, -1])
43
43
  def test_invalid(start):
44
44
  with pytest.raises(ValueError):
45
- LinearAnnealingScheduler(t_start=start, t_end=4, initial_val=1, target_val=0)
45
+ PytorchLinearAnnealingScheduler(t_start=start, t_end=4, initial_val=1, target_val=0)
46
46
 
47
47
 
48
48
  def _isclose(x, y):
@@ -1,44 +0,0 @@
1
- # Copyright 2023 Sony Semiconductor Israel, Inc. All rights reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- from tqdm import tqdm
16
- from typing import Callable
17
-
18
- from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
19
- from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import \
20
- SoftQuantizerRegularization
21
-
22
-
23
- def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen: Callable) -> Callable:
24
- """
25
- Returns a function that computes the regularization term for GPTQ training based on the given
26
- rounding type in the GPTQ configuration.
27
-
28
- Args:
29
- gptq_config: A GPTQ configuration.
30
- representative_data_gen: Dataset used for the GPTQ training.
31
-
32
- Returns: A function for computing the regularization. If there is no regularization function defined for the given
33
- rounding type, then it returns a function that just returns 0.
34
-
35
- """
36
- if gptq_config.rounding_type == RoundingType.SoftQuantizer:
37
- # dry run on the representative dataset to count number of batches
38
- num_batches = 0
39
- for _ in tqdm(representative_data_gen(), "GPTQ initialization"):
40
- num_batches += 1
41
-
42
- return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
43
- else:
44
- return lambda m, e_reg: 0