fusion-bench 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. fusion_bench/compat/method/__init__.py +2 -0
  2. fusion_bench/compat/method/base_algorithm.py +7 -2
  3. fusion_bench/compat/modelpool/__init__.py +3 -2
  4. fusion_bench/compat/taskpool/__init__.py +1 -1
  5. fusion_bench/dataset/arc_agi/__init__.py +6 -1
  6. fusion_bench/dataset/arc_agi/arc.py +26 -7
  7. fusion_bench/dataset/arc_agi/arc_agi.py +156 -25
  8. fusion_bench/dataset/arc_agi/np_cache.py +0 -1
  9. fusion_bench/dataset/arc_agi/preprocess.py +51 -9
  10. fusion_bench/dataset/llama/__init__.py +1 -0
  11. fusion_bench/dataset/llama/alpaca.py +93 -3
  12. fusion_bench/dataset/llama/collate.py +72 -5
  13. fusion_bench/dataset/llama/metamathqa.py +50 -0
  14. fusion_bench/dataset/llama/preference_700k.py +70 -0
  15. fusion_bench/dataset/llama/stanford_shp.py +90 -0
  16. fusion_bench/dataset/llama/ultrachat.py +58 -0
  17. fusion_bench/dataset/llama/utils/__init__.py +0 -0
  18. fusion_bench/method/__init__.py +4 -1
  19. fusion_bench/method/adamerging/__init__.py +1 -1
  20. fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -4
  21. fusion_bench/method/adamerging/min_norm_solvers.py +4 -4
  22. fusion_bench/method/linear/expo.py +39 -0
  23. fusion_bench/method/lm_finetune/__init__.py +1 -0
  24. fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
  25. fusion_bench/method/lm_finetune/fullfinetune_sft.py +122 -150
  26. fusion_bench/method/lm_finetune/peftfinetune_sft.py +102 -157
  27. fusion_bench/method/pruning/llama_magnitude_prune.py +2 -2
  28. fusion_bench/method/pruning/llama_random_prune.py +2 -2
  29. fusion_bench/method/pruning/magnitude_diff_pruning.py +2 -1
  30. fusion_bench/method/rankone_moe/__init__.py +3 -0
  31. fusion_bench/method/rankone_moe/clip_rankone_moe.py +160 -0
  32. fusion_bench/method/rankone_moe/rankone_moe.py +249 -0
  33. fusion_bench/method/simple_average.py +1 -1
  34. fusion_bench/method/surgery/__init__.py +3 -0
  35. fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
  36. fusion_bench/mixins/__init__.py +2 -0
  37. fusion_bench/mixins/clip_classification.py +60 -12
  38. fusion_bench/mixins/fabric_training.py +320 -0
  39. fusion_bench/mixins/lightning_fabric.py +11 -2
  40. fusion_bench/modelpool/__init__.py +2 -0
  41. fusion_bench/modelpool/causal_lm/__init__.py +1 -1
  42. fusion_bench/modelpool/causal_lm/causal_lm.py +21 -22
  43. fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
  44. fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
  45. fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
  46. fusion_bench/models/chat_templates/__init__.py +1 -0
  47. fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
  48. fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
  49. fusion_bench/models/hf_clip.py +50 -9
  50. fusion_bench/models/rankone_moe.py +410 -0
  51. fusion_bench/models/surgery/surgerymodelwrapper.py +157 -0
  52. fusion_bench/models/utils.py +8 -0
  53. fusion_bench/models/wrappers/layer_wise_fusion.py +14 -5
  54. fusion_bench/models/wrappers/task_wise_fusion.py +5 -5
  55. fusion_bench/optim/__init__.py +2 -0
  56. fusion_bench/optim/exception.py +47 -0
  57. fusion_bench/optim/lr_scheduler/__init__.py +1 -0
  58. fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
  59. fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
  60. fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
  61. fusion_bench/optim/mezo.py +0 -2
  62. fusion_bench/programs/fabric_fusion_program.py +5 -1
  63. fusion_bench/taskpool/__init__.py +10 -2
  64. fusion_bench/taskpool/clip_vision/__init__.py +1 -0
  65. fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +112 -0
  66. fusion_bench/taskpool/clip_vision/taskpool.py +43 -6
  67. fusion_bench/taskpool/llama/reward_model.py +157 -0
  68. fusion_bench/taskpool/nyuv2_taskpool.py +2 -0
  69. fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +2 -1
  70. fusion_bench/utils/hydra_utils.py +22 -0
  71. fusion_bench/utils/plot/__init__.py +0 -0
  72. fusion_bench/utils/plot/token.py +52 -0
  73. fusion_bench/utils/plot/token_notebook.py +127 -0
  74. fusion_bench/utils/type.py +5 -3
  75. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/METADATA +1 -1
  76. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/RECORD +104 -57
  77. fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
  78. fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
  79. fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
  80. fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
  81. fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
  82. fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
  83. fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
  84. fusion_bench_config/fabric_model_fusion.yaml +1 -1
  85. fusion_bench_config/llama_full_finetune.yaml +19 -0
  86. fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
  87. fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +13 -6
  88. fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +17 -9
  89. fusion_bench_config/method/rankone_moe/rankone_moe.yaml +26 -0
  90. fusion_bench_config/method/regmean/clip_regmean.yaml +1 -0
  91. fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
  92. fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
  93. fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
  94. fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
  95. fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
  96. fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
  97. fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
  98. fusion_bench_config/nyuv2_config.yaml +5 -1
  99. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +18 -0
  100. fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
  101. fusion_bench_config/llama_weighted_average.yaml +0 -26
  102. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/LICENSE +0 -0
  103. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/WHEEL +0 -0
  104. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/entry_points.txt +0 -0
  105. {fusion_bench-0.2.5.dist-info → fusion_bench-0.2.7.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
2
2
  fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- fusion_bench/compat/method/__init__.py,sha256=OmhrEUm6lNGJFoqock0N0YvxipIRXhBh09pqkxWKc9A,4743
4
- fusion_bench/compat/method/base_algorithm.py,sha256=ebkl8wkmjYpEDIm0pu2SeS17JMd_6b9G9FHp7ngfPHY,1768
3
+ fusion_bench/compat/method/__init__.py,sha256=KUKHpX7AfvB7fmOAlruWp0r1z17xpkI9l29PMvLWR9A,4956
4
+ fusion_bench/compat/method/base_algorithm.py,sha256=0BG_QUtFCbfK8OmiYfEw8xaOj_G0unpqXDEJKXTCNYk,1952
5
5
  fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py,sha256=m68BRGy4P-P9lLB10oXOBI-p58a-0FOPcrJ4r4MU32k,1100
6
- fusion_bench/compat/modelpool/__init__.py,sha256=eTglIrZdLgUkaiKFP8Pcf3nfOXFfIXBmJzR036fCq68,4664
6
+ fusion_bench/compat/modelpool/__init__.py,sha256=KD8Ddr9D7rJ5YdHEQsTuNmQ0bgQfqF4l3WNMtHmRHD8,4687
7
7
  fusion_bench/compat/modelpool/base_pool.py,sha256=1gxQENvdcOSdHmUbw-x7-X-aXtoSa1Gsys_on1ys8FM,10639
8
8
  fusion_bench/compat/modelpool/huggingface_clip_vision.py,sha256=LyIPgepNOK0qrk_EnBdlTC0ZnEkEZvPUy45cO60TiPU,6918
9
- fusion_bench/compat/taskpool/__init__.py,sha256=fTHd7_7EwSM2K06gUCQZ1jxxhl8T_kP0ouv70wBLhpI,3630
9
+ fusion_bench/compat/taskpool/__init__.py,sha256=LHCRs7vrWMTtMfrqFRMmnNiSZnnZ7tZyVwXZxbi1jvQ,3651
10
10
  fusion_bench/compat/taskpool/base_pool.py,sha256=1AIZBxqUJgshq0Xo3Yo9es4b-8X8ksN1mFHxSOqnDsA,3307
11
11
  fusion_bench/compat/taskpool/clip_image_classification.py,sha256=ZYZsbsE-fPzm6yafA0p-6wcDwVGryLmtXXtuEXeQbTY,7425
12
12
  fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py,sha256=O_WWr6Ivpqm-XbkaDsseDPKYcqp2jezxy-8fOrICFzY,5566
@@ -19,38 +19,43 @@ fusion_bench/dataset/gsm8k.py,sha256=CmANZ0A89PfPwVu_myKhXk1D9IwypOpjH3iqDo1KxcQ
19
19
  fusion_bench/dataset/image_dataset.py,sha256=MSZE_UESyRRQDwnkm2KpyIARUg9SWcwqnH4fDNstzS4,1870
20
20
  fusion_bench/dataset/imdb.py,sha256=YRzeq5z-Fl0aYcC2QtwEBWFkvucvpNo975jwjL5SZvs,260
21
21
  fusion_bench/dataset/nyuv2.py,sha256=2OdIEaY1ywFYMLUxCTpFcIctcBMFTq4nnoOkudSo-jI,3750
22
- fusion_bench/dataset/arc_agi/__init__.py,sha256=JmMAKCk56GKbOOBnfMJtbrkkCVVFPYRhWqW2XvyiHf0,52
23
- fusion_bench/dataset/arc_agi/arc.py,sha256=jhEDJWSbKRjp1vVbKDwIaSBRzXcz6Ir_CiP4mjtiPpA,9221
24
- fusion_bench/dataset/arc_agi/arc_agi.py,sha256=BEYPktvG77zaLnPK7672jck7NiFGpKaRepJyIPm3bYM,7095
22
+ fusion_bench/dataset/arc_agi/__init__.py,sha256=xj8BMG296qPMiL4NYs-ZwqcLJ6yT2wwbubyCbWPe91w,149
23
+ fusion_bench/dataset/arc_agi/arc.py,sha256=EH51Sk9tR7fEMdpeaTUT49_9LtjIO8VMTzSfzk8SQEI,9568
24
+ fusion_bench/dataset/arc_agi/arc_agi.py,sha256=SFOjp0yZrsoln4cQgWU2b-WfI39od6IE1Wof8Ee0888,11768
25
25
  fusion_bench/dataset/arc_agi/augmenters.py,sha256=yhTqyRk0_zamXRQ5Ev10xYc8Dc9D71BTSOkt856x33I,30890
26
26
  fusion_bench/dataset/arc_agi/messagers.py,sha256=E6BqF1iL68ge1m9wOJMSb2Pz6_5i9CR0HxBb7i73plE,53076
27
- fusion_bench/dataset/arc_agi/np_cache.py,sha256=1OoqMEdu9MwiaO086HZPOwfoYmwojFJfSRx9ApP8WgU,5440
28
- fusion_bench/dataset/arc_agi/preprocess.py,sha256=NSuM9ECucPamYd-Ost0voIxR19rBrD0JXLbxzXZr898,6741
27
+ fusion_bench/dataset/arc_agi/np_cache.py,sha256=Ec1DQFtlBdMy-f4dvGEhSr4jyVnBLQELwvX1ztxJKBs,5439
28
+ fusion_bench/dataset/arc_agi/preprocess.py,sha256=lQrXqV4SkhrxREgbqFAop-IwC5qaoxkKosoMO-ZHITY,8509
29
29
  fusion_bench/dataset/arc_agi/representers.py,sha256=-2eTYl-UcFW4zULDjkUrOQYv9P31nttMjc9eTJsaN0g,35852
30
- fusion_bench/dataset/llama/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- fusion_bench/dataset/llama/alpaca.py,sha256=sITFsghX2w0KzLwQ71KRz6rfsI2WLjuuKwt8OetvmCQ,4778
32
- fusion_bench/dataset/llama/collate.py,sha256=exaJAi0EbPTwMf69rgzeaD2IhY4YwtgoWgRfTQtZha0,1717
30
+ fusion_bench/dataset/llama/__init__.py,sha256=p8M7G69L6bga4qLl5lvAO6SKNeUBn99kkJrAQEeOvHw,22
31
+ fusion_bench/dataset/llama/alpaca.py,sha256=0nCQRBZzIPaMzA5VSJAsWw-nE0aVhiAQD5MGJRSrvEQ,7787
32
+ fusion_bench/dataset/llama/collate.py,sha256=fSH-vKKCGCpPT47gchETXLF2yTCMPUE3NTE-inCdczg,3869
33
+ fusion_bench/dataset/llama/metamathqa.py,sha256=z9InmEfWy_wWMbBORumFA2NakEznZWsDWgZzOsXYWhA,1617
33
34
  fusion_bench/dataset/llama/openai.py,sha256=_QXz6ciUTN8u4ILDowZPT3SQTes7ngkFZe1MRLFtVQ8,5012
35
+ fusion_bench/dataset/llama/preference_700k.py,sha256=CqD0ZnM2F2Z3u70tD3VaQ2yPAHkOv75m-eiXiQKIRp0,2582
34
36
  fusion_bench/dataset/llama/sharegpt.py,sha256=8hdh_5BcxIyK0ByZoVLdhd_I06kpHffxQdaC6ezzHkM,5249
35
37
  fusion_bench/dataset/llama/squad.py,sha256=H0L0BHFzVTtkw7jfgTA8gzvZDhzsqfIALq1ip_BVwaM,4810
38
+ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBkl6RJ6ec3Tf6UU,3763
39
+ fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
36
40
  fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
37
- fusion_bench/method/__init__.py,sha256=z_Bx0533GxTdgLd-x4hxdDoF5gND199XFiPthr86Yhw,5585
41
+ fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ fusion_bench/method/__init__.py,sha256=bwYq0wOxiTPCuR1GvLtqQ9Sx91mikfTTJKeSHFsSRn4,5781
38
43
  fusion_bench/method/base_algorithm.py,sha256=5dutGZfPqNhO8F8FOlo3UFR91TZu2Xj7O0pTB40JvWo,1135
39
44
  fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
40
45
  fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
41
46
  fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
42
- fusion_bench/method/simple_average.py,sha256=yzQg-qzldJtfPG0uYLFLQTSpeXc8Q4H88pkztzIdXds,4481
47
+ fusion_bench/method/simple_average.py,sha256=2ghcL1E-eLbIYDCHYCoR9WtiYSb1GvFAH163OTTTEEI,4481
43
48
  fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
44
49
  fusion_bench/method/ada_svd/clip_vision.py,sha256=QrT6cSwgVEGxXEpVhkvKQVQaoRW5P9V52Y3_8NX0f-o,12556
45
- fusion_bench/method/adamerging/__init__.py,sha256=tRQYeKUUIejmKn6YtQ0rtSlok5UQBnmQqbNokgjlNk4,376
50
+ fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
46
51
  fusion_bench/method/adamerging/clip_layer_wise_adamerging.py,sha256=YdQ4trHohW6QzWC2enYvXA44WHxvzmoH_6sMrPn6z60,1305
47
52
  fusion_bench/method/adamerging/clip_task_wise_adamerging.py,sha256=Tys9pDJzz5YNUCO43pO44fGAnizfSaeAwgH4-vVxRN4,6948
48
53
  fusion_bench/method/adamerging/entropy_loss.py,sha256=ZeVe0Hq1PaMfppLqDbB0MOscZUZRNh4CALrvt8pmQC0,736
49
54
  fusion_bench/method/adamerging/flan_t5_layer_wise_adamerging.py,sha256=osc6ueCgiS4u8KUV_sZkHGFBYC8dThnTSp4NB0wkQIg,12915
50
55
  fusion_bench/method/adamerging/gpt2_layer_wise_adamerging.py,sha256=jTGUbhJCV1pcJ5k5jVeAhmtHdbHK5LlEfBhF-86xWjY,13773
51
- fusion_bench/method/adamerging/layer_wise_adamerging.py,sha256=Zi5JW1V10DQNZ_0G9Ny_YH4ATP0mWuO6AjrZRwO-CxU,9432
56
+ fusion_bench/method/adamerging/layer_wise_adamerging.py,sha256=6d1vWuyiAQDh_kLLrZixPyTAxovOjfq-2T2hgLGXCWg,9734
52
57
  fusion_bench/method/adamerging/llama_adamerging.py,sha256=DHm83VaaxxHFaeFY2qbxgO1Ub3Fiqawy4p5AqCkmEp4,13112
53
- fusion_bench/method/adamerging/min_norm_solvers.py,sha256=uolDwgTt8yLFuMcsQbAKO0jT6pbsG1YbSR77S40bKNs,8287
58
+ fusion_bench/method/adamerging/min_norm_solvers.py,sha256=a7n2X0BE_YajlaUygyHV0yqW6-x5dTyZ5V0mt_Q69qE,8291
54
59
  fusion_bench/method/adamerging/task_wise_adamerging.py,sha256=tUy_P4lCn6u5srFCIyMdHs-Hc1MSge4meenK8UA25tw,6006
55
60
  fusion_bench/method/adamerging/utils.py,sha256=Yq8ovlpLJY-5MkSmpoB-_EMYG8cr6eyO-WUZTxKxMTI,432
56
61
  fusion_bench/method/analysis/__init__.py,sha256=EQzOCShS0hF958drq1yg2oSVsS0hvBznPxtTAWB9SGY,122
@@ -77,23 +82,24 @@ fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWET
77
82
  fusion_bench/method/fisher_merging/fisher_merging.py,sha256=CPU-tJiDv9FCIBYl7Pn0zA5cdRB1Md5kWchRDlJgly0,20456
78
83
  fusion_bench/method/fisher_merging/gpt2_fisher_merging.py,sha256=LZmz41jZ5dSsAHxfOUpr3u2rlCgUPTDR7xMsIlQM-jc,7576
79
84
  fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
80
- fusion_bench/method/linear/expo.py,sha256=jTZyI0dtYa4GbchJ78mJuaYrDHD8u3ywxyQvP46hZV8,2681
85
+ fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
81
86
  fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
82
87
  fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
83
88
  fusion_bench/method/linear/simple_average_for_llama.py,sha256=7JlVrmTMmrePvNGnZNoxSuCSq2Vu7cPQzjGC3WWUXBE,2079
84
89
  fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
85
- fusion_bench/method/lm_finetune/__init__.py,sha256=rIkKoxrqKEYkA7XIR6jyhwvUK_ebX2k6Fm1d7K1kU5g,92
90
+ fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
91
+ fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=ys_td1IeL3bzPTE0Cixlj2JooCaB7qseRwSDwroAk5A,18777
86
92
  fusion_bench/method/lm_finetune/causal_lm_pretrain.py,sha256=4CL9KGFsUzrt-edMfTooo4G4apzTH_57rso3DGGvKL0,219
87
- fusion_bench/method/lm_finetune/fullfinetune_sft.py,sha256=J0yhnmM1TXRUC-Mte0a26BAbnHNDLVb7JEq8zrJzvLs,16931
88
- fusion_bench/method/lm_finetune/peftfinetune_sft.py,sha256=sLWlJRwFtSw9vXbbDF_dKW2AbgHK1BNzihK9hPho7ck,17835
93
+ fusion_bench/method/lm_finetune/fullfinetune_sft.py,sha256=eZabmkL7QUHGxl0DX9aaCYerMVUTzH5fXQfJXiSQNEc,16226
94
+ fusion_bench/method/lm_finetune/peftfinetune_sft.py,sha256=klZ_IDr5-1xoYvyVZwug9eyKXyxA3WZuSaML2jCH_Gw,16370
89
95
  fusion_bench/method/mixture_of_experts/__init__.py,sha256=r95iu1-3tgIUP7sWuAbLuqV7xexNYMYPZkM4_8egfp8,198
90
96
  fusion_bench/method/mixture_of_experts/mixtral_merging.py,sha256=-n1CLP1o08VyMSfaTq42kRutbw-cFDSCWHTu0iNh6ok,4237
91
97
  fusion_bench/method/mixture_of_experts/mixtral_upcycling.py,sha256=tQYAeS8MLFEfH3zDFfNZrML7lRnpGLN-HquQvjPtHNw,11208
92
98
  fusion_bench/method/pruning/__init__.py,sha256=3gtmay2bkdIAEGjpAhbY2ztMZOZLKhiJcKV3mCe2H5w,252
93
- fusion_bench/method/pruning/llama_magnitude_prune.py,sha256=ihHa8SNe0WGPuZqRKI_6S6gmH4ooTmeTRARGkJHcsos,6300
94
- fusion_bench/method/pruning/llama_random_prune.py,sha256=c-qV1iFSKZK1dES6gYsgWna1BUn58dtO0NjV1eIfJrg,4566
99
+ fusion_bench/method/pruning/llama_magnitude_prune.py,sha256=40Gmy665S9XqIw027En6E5IlomOIcKECIRje7NDkH00,6300
100
+ fusion_bench/method/pruning/llama_random_prune.py,sha256=EW7zfE-1a5VlPPrQ5xO1k1aqFcpPUfs5eSO_a4M1K90,4566
95
101
  fusion_bench/method/pruning/llama_wanda_prune.py,sha256=8pcg3X1yn8vfhV0lEg1fHP3oTzAc_-ixLmsZRdH5uPo,12070
96
- fusion_bench/method/pruning/magnitude_diff_pruning.py,sha256=vMyhZF_dWLkgB9A1RGpuYugJ6B-estwTvICj5WC904g,6450
102
+ fusion_bench/method/pruning/magnitude_diff_pruning.py,sha256=nXRHW87_Nwiash-udnwR9iOaJMBDo7fPTmAwmSqsAaI,6451
97
103
  fusion_bench/method/pruning/prune_utils.py,sha256=ITWO8WtrhcOYXTcjc_fAAw7cyjvqFa6axawPr3uTT68,5882
98
104
  fusion_bench/method/pruning/wanda_utils/__init__.py,sha256=ujOZ9GUTwzqfVjXUL0e6y_gAEfTQU85rBq2MZ5om7oQ,320
99
105
  fusion_bench/method/pruning/wanda_utils/ablate.py,sha256=TUKsbInQD3UmS8FpuFeco6FeTMaJLZXho9ASWRPcurc,6459
@@ -109,6 +115,9 @@ fusion_bench/method/pwe_moe/module.py,sha256=D4HDx7iDfKX_vJ3vkzi6_atKKlzJT6nH0sr
109
115
  fusion_bench/method/pwe_moe/utils.py,sha256=K9BeVMrhYv7GNlJO76eoQbkI1dOO7XF18yK06WUh9ZA,1336
110
116
  fusion_bench/method/pwe_moe/phn/__init__.py,sha256=PXX-hb_bd7GdtLHcAcnGGsW_Wbg8g2YlRZMTCk3axUw,78
111
117
  fusion_bench/method/pwe_moe/phn/solvers.py,sha256=OO-ImNwsWIQ3eXPxzj1V-kNgXrJc4FKcK-RwaOl_np0,6156
118
+ fusion_bench/method/rankone_moe/__init__.py,sha256=hvYxnloCrzim9s7HUaNA3dcuThEcfrFL5EMw34YNHeE,119
119
+ fusion_bench/method/rankone_moe/clip_rankone_moe.py,sha256=2wnzyHHZSQagZenu9viJ-68MmRG0ppOLR5JHZuT1FKE,5457
120
+ fusion_bench/method/rankone_moe/rankone_moe.py,sha256=YPWneidBJjms2SrYgH5tAim4KBl3Rrcmeq9Xf5QwU58,8489
112
121
  fusion_bench/method/regmean/__init__.py,sha256=VVqAkdHkb005Sc2XmeiedQYzb3q5aQNI8xzEJnE4thg,158
113
122
  fusion_bench/method/regmean/clip_regmean.py,sha256=xhT7dYSCg9sPLL5ZUCCtcA-Ypw4PBHsOivrnz-3fDso,4931
114
123
  fusion_bench/method/regmean/gpt2_regmean.py,sha256=p2D3E8YAZsltsI6GM474UWNqPZfBqihLZ93ZLUpOJ_c,5565
@@ -125,6 +134,8 @@ fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py,sha256=J8iVYks-SQ93dqh6F
125
134
  fusion_bench/method/sparse_we_moe/sparse_we_moe.py,sha256=6OYgj_D_4xTtqy_guA7whQu76LQ7gv-U2cIZkXe7bIg,10479
126
135
  fusion_bench/method/sparselo/__init__.py,sha256=0Uk4Hq5b9iwc5yl2QTDwvBHUItN4V6lwhxDYQrFb724,107
127
136
  fusion_bench/method/sparselo/sparselo.py,sha256=qkfFwovdOA7-NUXtLYiV1iM9bglQJydfuL805azQ6Xc,38806
137
+ fusion_bench/method/surgery/__init__.py,sha256=xVvJvVv9wJnVgJjZDgF2YliuSsy221AQ0KSwB7J7bjo,97
138
+ fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py,sha256=7qoWWYcTWpfXik21mEPBlMW5BM0Kc4nxSW1GL2hzP1o,5599
128
139
  fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
129
140
  fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=1D0uuNtqyA1VS35jh6AnEVsX72HnT02THyerck_lmso,5441
130
141
  fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
@@ -150,33 +161,41 @@ fusion_bench/metrics/text_to_image_generation/__init__.py,sha256=OEIxpKmyy6-3iWy
150
161
  fusion_bench/metrics/text_to_image_generation/aesthetic_scorer.py,sha256=-ZaD84ENPITh_K0Fe9OKYYoiGnPhlSE9gTbBqrtnqqA,4487
151
162
  fusion_bench/metrics/text_to_image_generation/compressibility.py,sha256=x4dNTFnAN4naChBDZBO-jUghnHAyobRVOupctKYRg1w,1656
152
163
  fusion_bench/metrics/text_to_image_generation/pickscore_scorer.py,sha256=aSWzl8k7z80Cirg5qdfkPsp3sMFEv_PjA1NJv3PPWXY,3115
153
- fusion_bench/mixins/__init__.py,sha256=hMxt39JDb_uIvNDtp6ZJEDmaQFwx8GId2VK2Wajw9Rg,791
154
- fusion_bench/mixins/clip_classification.py,sha256=rFF90BPrtkVWF8H1n1du9F2o0i2da9PfC3m0ipGsdus,8201
155
- fusion_bench/mixins/lightning_fabric.py,sha256=LPiBkOpUVltzFXBI6BkROMtYswITJyoALLboZrBItu8,6163
164
+ fusion_bench/mixins/__init__.py,sha256=AsUNvrHdNd6xht7-dfuVipmJuRfMNFlkgG-fn3ojt1U,892
165
+ fusion_bench/mixins/clip_classification.py,sha256=lsrh-qZln1Am0ry_rJL47EFCvVZFRHD2YYk9u3eecs8,9933
166
+ fusion_bench/mixins/fabric_training.py,sha256=ZmycEhCaNCgVi5oM9m0q6msxgk3quowmFvDAcvskFrg,13017
167
+ fusion_bench/mixins/lightning_fabric.py,sha256=XE3OwV68YmJX7aR64uk9h1j9Qs6UPU5F7ciBIgBYyv4,6461
156
168
  fusion_bench/mixins/rich_live.py,sha256=j7wNgrgwfdpl6nCXZGF_2DLtNq2aqCb_52Qhe9QSltc,495
157
169
  fusion_bench/mixins/serialization.py,sha256=9W50JUcM6wgFlaE9H29mATLLVobYniSDxg94FfY25w0,4049
158
170
  fusion_bench/mixins/simple_profiler.py,sha256=UDPB8QAA3rtsSdnVgL9KMthDLBY1Rh4h8mtiquiCPp4,2106
159
171
  fusion_bench/mixins/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
172
  fusion_bench/mixins/optim/adamw_with_warmup.py,sha256=qTnRl8GVVIfaplOFBHnJFuZUbxPZRWRGHGNzm_EDhDE,1421
161
173
  fusion_bench/modelpool/PeftModelForSeq2SeqLM.py,sha256=rxPKTTWno3KAcTTEfydPpXx1b0EJa8PLbqrberweFF8,2108
162
- fusion_bench/modelpool/__init__.py,sha256=Ydu0bLWBA15QfHv3C5Tme0ltwHYTQFm2R-FpBVzgb-o,1266
174
+ fusion_bench/modelpool/__init__.py,sha256=LtcCWTcYkVaz7ZxEseWRwKDk3cSTg95-WQiriXBHLSA,1401
163
175
  fusion_bench/modelpool/base_pool.py,sha256=WCpDt0MpwIk_djpgpM_CANomAEJ7Uoj78GSHEYzD_oU,9142
164
176
  fusion_bench/modelpool/huggingface_automodel.py,sha256=OJ6EyYyjNv1_Bhjn-zli-e__BJ0xVa4Fx9lhXVb-DJo,552
165
177
  fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RPE2dcepeEB3agBKkkH-xA3yMj1czw,2014
166
178
  fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
167
- fusion_bench/modelpool/causal_lm/__init__.py,sha256=kxfDP9q5yTY-SijUu7YafgV56x7c-7LBvApbKJzP3eQ,78
168
- fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=AhuwsJLNiXUHl0Besyq2pzYo6G1_9r-iSuIIZm_70FM,4987
179
+ fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
180
+ fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=k0eOOcFbswVgBYhM9CEXvdCRU9zVC8Gw78QaiMWzeWo,4487
169
181
  fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
170
182
  fusion_bench/modelpool/clip_vision/modelpool.py,sha256=qG-b3ms-q3gqcRf7J6wrTDdmtu2yb1E_A25tNOjSli8,2065
171
183
  fusion_bench/modelpool/seq2seq_lm/__init__.py,sha256=FnfSMHcwNHDQEMdB2HdK4WphQ6MufsRLUkczuALjM4Q,57
172
184
  fusion_bench/modelpool/seq2seq_lm/modelpool.py,sha256=IjLHi8qycWOA4Ul9jnqR48evgVXF_pfTKLPeL9XKP-s,2052
185
+ fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=k-t4RetcDlbkRkPHNuyeV3pQEcJnFRjd9Wp5tFBb-G8,128
186
+ fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
187
+ fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIKAmGJwfrNSuWtxzJ_-ME0gQksEYY2y-jVt7P82Qs0,3434
173
188
  fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
174
- fusion_bench/models/hf_clip.py,sha256=yOQ6UKMymQ3GcfpPm26QiToPztij-cXukNMMKXTmUrw,5745
189
+ fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
175
190
  fusion_bench/models/parameter_dict.py,sha256=hRie26WIeXU-wvY6JeGaP8LvpMqbuZA6Ia_1vOFMuu4,2294
191
+ fusion_bench/models/rankone_moe.py,sha256=uwpAqk1cwxxprQ0hxuAwRuPvHDxxBKBDahd9vcaafXs,14248
176
192
  fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
177
193
  fusion_bench/models/sparse_we_moe.py,sha256=b-yIeCsl2rz0i7BP9g_fqCEam7KUNjNX_J8oyZV6MJ8,16509
178
- fusion_bench/models/utils.py,sha256=7HKXRiWHeoNWp8LyDemG2irnMPkT9qg2ExvxjE5mUck,1858
194
+ fusion_bench/models/utils.py,sha256=AQFI2UZSItKfJpG8ex74FPjn_SjsADLhvpv1GYqu43U,2065
179
195
  fusion_bench/models/we_moe.py,sha256=0U-m3mhzb4vFLIzn2jd7j_SQOF9lot4ddzq0l_VPp9g,8424
196
+ fusion_bench/models/chat_templates/__init__.py,sha256=v9vKrCfBgZ3UsMBQatZv1Z-ayPualBl5ciV0aO3p3iY,85
197
+ fusion_bench/models/chat_templates/llama_3_Instruct.py,sha256=E6grNPECr0r1KDPIGW_DmpKQw5-Dh5WbMiTaHWDXwXo,4008
198
+ fusion_bench/models/chat_templates/load_tokenizer.py,sha256=yRs3dB2tZo0Oh-YLJcMZzWSQ5Ps8KXrggZNb5F-aBuM,1400
180
199
  fusion_bench/models/linearized/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
200
  fusion_bench/models/linearized/linearized_model_utils.py,sha256=5yKXReQHIwDttzT_oXwY_iIpaG1zIU0Nv93BWmmOqrg,3212
182
201
  fusion_bench/models/linearized/vision_model.py,sha256=HhbhtyoLD1qVvh1Sgl_beYF2W7AvMevmUy4Jx2XlcsY,4636
@@ -208,15 +227,21 @@ fusion_bench/models/nyuv2/resnet.py,sha256=PcCfBhEsxm7W8cu3epBbIbCYFARPrPTamIa3T
208
227
  fusion_bench/models/nyuv2/resnet_dilated.py,sha256=4EXB6vrBJS307YP6k-TRY1dFJ50LURcTuzqN4tZzYRk,3125
209
228
  fusion_bench/models/smile_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
210
229
  fusion_bench/models/smile_moe/linear.py,sha256=voFvx4Nnfgc6YReBcY9FUGG3WrxVRjyD3odX4jIS5Eg,8724
230
+ fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=zkiV_IF4-7CfzUND--fGCzgz-Sa-dCUz5CmVoPjQt1c,5132
211
231
  fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
232
  fusion_bench/models/wrappers/ensemble.py,sha256=wIMZMRyXw5boWAm96c4Tiyebs_HDQovKxpGQ8rLnHUQ,6308
213
- fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=hxyizABheJds7U_I_0h5yZh2ZvhQlkfycLJ9K5DZQ3c,12210
214
- fusion_bench/models/wrappers/task_wise_fusion.py,sha256=RBs1NL0eFDCMTIQg9kZXrWZ32n1RrKkcAN8OuKdN6Qw,8344
215
- fusion_bench/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
216
- fusion_bench/optim/mezo.py,sha256=WNcJw-Az6wnctc6pqVAloDifqgef31ZI2jwlpMKmlfo,3693
233
+ fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=tISTe__HvlaHVVKkfa0nX3JRRDYLHetJ4BzIqGq-058,12316
234
+ fusion_bench/models/wrappers/task_wise_fusion.py,sha256=gNOU1t1JUcBr3V0Apa1uvJDl5BDM2is85lkEF1SfPRo,8404
235
+ fusion_bench/optim/__init__.py,sha256=lemrcuiA6OLjQkpYm-RP-Ox2MgjngN1ywvCo0NgShlM,61
236
+ fusion_bench/optim/exception.py,sha256=fMgo1heiqfGhuI5RIbf30BwWSShn5RQiyeb30QtfTI0,1607
237
+ fusion_bench/optim/mezo.py,sha256=Vm4vMGh10Fhe28_9L1MK8r_U7DrurA8Liprh2_gn4_U,3646
238
+ fusion_bench/optim/lr_scheduler/__init__.py,sha256=W7CsdW4XKqXbNfzjvv2wmrvNWwfH_sQ-wiBViRPlP3U,29
239
+ fusion_bench/optim/lr_scheduler/linear_warmup.py,sha256=Dvy_TCUuAQHlbDF2jo2_502Ae4JWXGrtZL3gwA_H6ZI,6566
240
+ fusion_bench/optim/lr_scheduler/utils/__init__.py,sha256=GfZk9VYL3cFE1Qy2xQpGc1GCgnjySk5-D7EVRZ-C05Q,29
241
+ fusion_bench/optim/lr_scheduler/utils/visualization.py,sha256=Ea1n9ElNizAe0iUnjynyfteuZunv2-UBMN_NfEU2imA,3490
217
242
  fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31AsmvwvNvJw,508
218
243
  fusion_bench/programs/base_program.py,sha256=0dX_KcMWASo53pr-ldzfUBWIjEXy6oeDWZBrfc7FIk8,195
219
- fusion_bench/programs/fabric_fusion_program.py,sha256=tXUdVfLCVqjUqn8rGnc7BuXCDw0dLOkqqYtq4O6EnX4,12124
244
+ fusion_bench/programs/fabric_fusion_program.py,sha256=Bf4lnntM1J1hxKVm4Av0ohAmSqzDxOzWg75rzqps0qE,12297
220
245
  fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
246
  fusion_bench/scripts/cli.py,sha256=497nhqnJAwxkqU2WCMUqcAIvqTmGRdQaByWGNTX_onY,1131
222
247
  fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
@@ -224,15 +249,17 @@ fusion_bench/scripts/nyuv2_mtl_train.py,sha256=hB_P_4DIT83CGOXoyyaBnh9fYnxTJtvAP
224
249
  fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
225
250
  fusion_bench/scripts/clip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
226
251
  fusion_bench/scripts/clip/convert_checkpoint.py,sha256=zncgRAhInFpJDSHIm3GO4F6BzgsdAQVj3LLmV7g-JiQ,1221
227
- fusion_bench/taskpool/__init__.py,sha256=YgWy1iMYBmy2jvejjxHAE6-idaz9NS9qfqE5OFLaC9g,954
252
+ fusion_bench/taskpool/__init__.py,sha256=_qaYgzYnvrJDrZ2DjKXMvOFbelsLrujCKa_gP3UQBBg,1094
228
253
  fusion_bench/taskpool/base_pool.py,sha256=FaP0nndeSsrwbdd9mKa_CedbX9T5AHJmxk7Lc0NEVNY,835
229
254
  fusion_bench/taskpool/dummy.py,sha256=Di9JZO3XyDYn6wAGukrJMTnkS_NaxGTeQYo_3j1JD3Y,1675
230
255
  fusion_bench/taskpool/gpt2_text_classification.py,sha256=S4YyrcJhD4JOgvHF-AVG-gENgVGl-wpQZr1SbiThM04,4886
231
- fusion_bench/taskpool/nyuv2_taskpool.py,sha256=lnaR1oVm0pO2CA9EVV4uk3fiWYHD-F0GzPrUUARD75I,1970
232
- fusion_bench/taskpool/clip_vision/__init__.py,sha256=V_xu4npg1XJV8PV82I4QqLTlNoOTJVqUHTwYt5FS6BE,141
256
+ fusion_bench/taskpool/nyuv2_taskpool.py,sha256=Y-TI-rzh9udCjX3FJ11ZbIG7CGrjDccGc-Ch1Ug6cRY,2059
257
+ fusion_bench/taskpool/clip_vision/__init__.py,sha256=4xGO7rRbRpXF-I34A3WEMU4vydgfdtvXQ57ThaFcpmE,214
258
+ fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py,sha256=JKbRrGaRYztgZ-P0U767HISe40UpDVQ7fn6Tf2rrug0,4891
233
259
  fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py,sha256=hVDTtg-oXqRFmAE2wZPFpk_kvtdk_wS-2-ev2ujEJBs,5390
234
- fusion_bench/taskpool/clip_vision/taskpool.py,sha256=NRFXsp2N8PMQzZgFHy2yfJMjoYbDaxQpPTZ4-4EHPBY,13942
260
+ fusion_bench/taskpool/clip_vision/taskpool.py,sha256=xbJHQXUYd2ZDs-oIyE-3knCsPdiUbZCKN7O86kPwpsQ,14907
235
261
  fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
262
+ fusion_bench/taskpool/llama/reward_model.py,sha256=y5a_dNDzjYSoV9RNRdmDPhALM7i20A5EiFYSjrGTpWo,5028
236
263
  fusion_bench/taskpool/llama/test_generation.py,sha256=kJ_5GruG12FsuJHDh_S7pbQgwEojTqhGpA_wVNH5KPc,6675
237
264
  fusion_bench/tasks/__init__.py,sha256=Z_ePIp4Xizkj78QopLg1dZkJAN_IF73MkbR_nkfHQ9Y,52
238
265
  fusion_bench/tasks/base_task.py,sha256=Fg_pdZhld-2KPKX0C1WrxaTz0EYWrvJerAHO-hA03GI,412
@@ -258,7 +285,7 @@ fusion_bench/tasks/clip_classification/tiny_imagenet.py,sha256=Ar9uQOqUcgGl7MQX9
258
285
  fusion_bench/tasks/flan_t5_text_generation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
286
  fusion_bench/tasks/flan_t5_text_generation/datasets_preprocess.py,sha256=zo5S73jm7YDTMN_FxcPNM2dxQkqv2K2siw2xELARPwk,2448
260
287
  fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py,sha256=-B1wqVGp3wZVs0NB4fqoW0u2TvxOpLYzZF1RzppJ5sc,4357
261
- fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py,sha256=B9hLhJBDVilitvwdLkc3bpmIcUuhKlDY6AaQQsZz2R8,1832
288
+ fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py,sha256=sVihXHbqwi8IlDpiIxzvmDv-Ob7WKvi23GIRYbBUKOc,1833
262
289
  fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py,sha256=GhRmGmcJGF4oVgZQarsBtx8GNKrNEZUkrillNz3iBuY,13183
263
290
  fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py,sha256=mKMTXIr5o-BqS_Hvv1bbMvvjQLLeKNVw7BKS9qgQ8Dw,1890
264
291
  fusion_bench/utils/__init__.py,sha256=yFhiBlrdcsJqZe-C5wdlZZ3wpmSN8Tipfpa2-R7CFbc,337
@@ -268,7 +295,7 @@ fusion_bench/utils/data.py,sha256=51nbgOnayyerLBUGHrlm9iilGjhJsBkXKKGXOKgLRW8,61
268
295
  fusion_bench/utils/devices.py,sha256=72HeUVVlVGTt97JA7KFG3D8BM8VHqR-y1nkr9Bm-PRE,7578
269
296
  fusion_bench/utils/dtype.py,sha256=kYoEGqsXitnwOU3W7ivqhQ0OjdI7MGu1VsyMJS4cSyQ,4299
270
297
  fusion_bench/utils/functools.py,sha256=7_tYJ2WD88_2DDuOOj5aZz3cYuslYH5tsVyIgCeLtmk,1318
271
- fusion_bench/utils/hydra_utils.py,sha256=b-5UeDnUbMc2_NAbXVabyHeCwxfhmvAIx_aYUlzVVc8,159
298
+ fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqkoSGk,1174
272
299
  fusion_bench/utils/instantiate.py,sha256=v8L9JDfh2YoEOFpIQIHomvBoqdboZdYIaHEATnD2gdQ,16972
273
300
  fusion_bench/utils/json.py,sha256=iNeZHFvpzbb4oX-52dX15De_dMcux7vQtAUFZqW12GA,1907
274
301
  fusion_bench/utils/lazy_imports.py,sha256=v5l9cpHXPMaz1IVBmB5oOqefYr9vA3XvP340xT7Wy18,2796
@@ -280,17 +307,20 @@ fusion_bench/utils/pylogger.py,sha256=a5tHfpEFbsdzw0vhQxt4BJ6CfTXaxyuwzoDFhyNy4K
280
307
  fusion_bench/utils/rich_utils.py,sha256=V_BjY3o8bXMp-kWfxle4cK48GGHDnKbVonZX65qbXAA,5464
281
308
  fusion_bench/utils/state_dict_arithmetic.py,sha256=dVPBkO8Te9_VANPbetV59ORAQTw7D3css_-d0lYgK4k,9062
282
309
  fusion_bench/utils/timer.py,sha256=RC2hP8JqaibdL0FnRyUCBRf4m7CXyfn5tE16zBWZ7hg,1338
283
- fusion_bench/utils/type.py,sha256=vS3uPx1AJz9ct-hD2Rp142yug8xcQ8YRAciAwENRKME,441
310
+ fusion_bench/utils/type.py,sha256=4CmKDVL1bKZfAfEApq79YhQavG6ZGermpQThDwiVl5w,532
311
+ fusion_bench/utils/plot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
+ fusion_bench/utils/plot/token.py,sha256=QGmL_qX8drmWnN_VNLD_0YjKc1o_qahJE-svXVor8dU,1634
313
+ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFwyLeG0MIwOwF4,3739
284
314
  fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
285
315
  fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
286
316
  fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
287
317
  fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
288
- fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=ZAmS3hcFiWOkKxrlnZXg7moXuRX6lPi5uejqhEwIFV8,1164
289
- fusion_bench_config/fabric_model_fusion.yaml,sha256=68adtP49Kn7Qo9YjGSIe0ACxNFUuZ4FH9inq4U1-lbo,949
318
+ fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=GtK3VuD2FOpFHH_1Hi6tlaYpdLE5Cz0nYKP92Ss9G2Y,1164
319
+ fusion_bench_config/fabric_model_fusion.yaml,sha256=1shmbuC0B9snkFkLErBCiroF-z7UnEHscyEmKBne7Oo,949
320
+ fusion_bench_config/llama_full_finetune.yaml,sha256=J2qCNyX1GaPHg52juSRRIkfHSb6YJdiJOM3Bn3vAo0A,771
290
321
  fusion_bench_config/llama_magnitude_pruning.yaml,sha256=xFyDJpb8gyIjosteOpEW9eayONWhl0B763r1XmO-9w8,633
291
322
  fusion_bench_config/llama_model_fusion.yaml,sha256=EJRsSbt1zttAXAAy_-5NcMkWwhYrl0osjKGXQopu4bo,588
292
- fusion_bench_config/llama_weighted_average.yaml,sha256=SUP6pTcqMF_5lGgvRd_iWgmmD_s4iMGDZBRPfW38HGo,960
293
- fusion_bench_config/nyuv2_config.yaml,sha256=1rW-5ZsJOUCCQAvShdISmmYwyvY5vl3tukPTgtcpocY,410
323
+ fusion_bench_config/nyuv2_config.yaml,sha256=SYaafywjOIKK1f-Nl_K5EuBjXH2oN1whlqjN_dXXO-A,492
294
324
  fusion_bench_config/nyuv2_mtl_train.yaml,sha256=RfsrboIpL9Cct2RkRrKxXAqH4jLi1NECHbwH8iOGtDY,591
295
325
  fusion_bench_config/dataset/image_classification/test/cifar10.yaml,sha256=wlzzTXAKzBjQXKYWhZPGcwydxwgeAV8sM4Dp3GBHbw0,78
296
326
  fusion_bench_config/dataset/image_classification/test/cifar100.yaml,sha256=f_tsvq5bbw2Trp3f6mokXV7hUlfYr_yuebZkEqJqIVI,79
@@ -325,6 +355,8 @@ fusion_bench_config/dataset/image_classification/val/stanford-cars.yaml,sha256=p
325
355
  fusion_bench_config/dataset/image_classification/val/sun397.yaml,sha256=cayl6FNzxOj2UBjw0ikJoQNCdN3DX10xQmcx4ouFP-0,245
326
356
  fusion_bench_config/dataset/image_classification/val/svhn.yaml,sha256=uMdEYmc406i9HdkOLzfzBiJ8pfbYLIWT1pA_UId8HMg,265
327
357
  fusion_bench_config/dataset/image_classification/val/the_eight_tasks.yaml,sha256=x1-xurkOIQtWX-gpSwXDxA5fVY30KmrarS7EKaje33M,101
358
+ fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml,sha256=QJK8OM-C2cZNaC3_vbRucuWrTggw69YWEtPOzmxm6bo,174
359
+ fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml,sha256=uqOGtDu4MNAOnwCA4Qp6elcnc8X_Y7sTrwYdruAvjGU,106
328
360
  fusion_bench_config/dataset/question_answering/search_qa.yaml,sha256=u_8UTKQFnjCwbeXqx2grC2bzLDpdEQy3s3Oxip_JEoc,118
329
361
  fusion_bench_config/dataset/question_answering/test/search_qa.yaml,sha256=oB2qP5ScTTbFGp75a5VLBaQtUa9VYHkGNhNAfe-AkvE,132
330
362
  fusion_bench_config/dataset/question_answering/train/MetaMathQA.yaml,sha256=3-stubxiEKjuBQHhqS_Tc_BqGK3IOwzaAtnO4sd5SX8,90
@@ -343,9 +375,13 @@ fusion_bench_config/dataset/text_generation/train/gsm8k_question_label.yaml,sha2
343
375
  fusion_bench_config/fabric/auto.yaml,sha256=GOCIA6s_co-JpwyBj3dM-dgWUMKciD8lFRmFThUeAsA,575
344
376
  fusion_bench_config/fabric/llama_ddp.yaml,sha256=iP-3n-hehRSjmJkdQWhDb4AkLcfFa6PFq0BCrL58xso,706
345
377
  fusion_bench_config/fabric/llama_fsdp.yaml,sha256=--_G5mcyG6L3aEBNvTjvMH8D-jD0SMXGap6V8E3jH84,575
378
+ fusion_bench_config/fabric/llama_peft_fsdp.yaml,sha256=V-iBtvSg_m2o42ERYRxlDITqeEUBoRTMrLzfVOtN8VU,580
346
379
  fusion_bench_config/fabric/loggers/csv_logger.yaml,sha256=Pv8I-xbxrpTb_fwtDiUtCAEoCZ8QYCLu2GeJNzb3Z3c,373
347
380
  fusion_bench_config/fabric/loggers/tensorboard_logger.yaml,sha256=w9ZP1i8lRYQFslzEM98PmbcFhhn5dXReSJhLOdEi-do,381
381
+ fusion_bench_config/fabric/loggers/wandb_logger.yaml,sha256=eF4slc6QPRuMCMJVeFHNJirsGiB15WQIxNgioXNwezc,142
382
+ fusion_bench_config/fabric/strategy/deepspeed.yaml,sha256=uHujbd2sKrgWb5YhFTWOJCZefStv6O_HHo_GylzqYbU,344
348
383
  fusion_bench_config/fabric/strategy/llama_fsdp.yaml,sha256=WBx05GFUCuEtF-H7LhlTq95VZeaIg36hqntw478qJng,307
384
+ fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml,sha256=xoxeQ0Pp7ecZPcAX57PhQJsqRUKhqEmNc3DXmYXqx4Y,348
349
385
  fusion_bench_config/hydra/default.yaml,sha256=TT0RaUwYgfB7pKpbHgEbmuCVTB2fx2eXxvM-Xz3SQMI,241
350
386
  fusion_bench_config/hydra/help/fusion_bench_help.yaml,sha256=v8s891Cr5wyxBXGDn_VBBwwRmb0JXOL874Sl-zNoCWA,1880
351
387
  fusion_bench_config/hydra/job_logging/rich_logging.yaml,sha256=_dYGeFTCqaPrRowLXBNMXwzYhw8ns1TkQFfALwK1aCw,441
@@ -387,13 +423,15 @@ fusion_bench_config/method/linear/simple_average_for_llama.yaml,sha256=QJR5qx9z4
387
423
  fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml,sha256=N7cyHm6a2QwNsV9uaJp-eZmdbs9kmdRrkxtO58QQQgM,116
388
424
  fusion_bench_config/method/linear/weighted_average.yaml,sha256=SmELszTsJU63e8KwIrPmSqKmOmH-rz42zeumQZHoVDY,187
389
425
  fusion_bench_config/method/linear/weighted_average_for_llama.yaml,sha256=r8BlNqzRfn--_gDSff6KI8FO-elWFIszZDRV7G_nvHw,499
390
- fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml,sha256=YgiRBeTCQKeMjkxRhABw3teEvGc6X74w43_QVVumcVg,1189
391
- fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml,sha256=UDwjd4vlQ-LgthHeOzyd3c1HeoY8lD_5F7kefpMXhNI,1471
426
+ fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml,sha256=em0Lnodl9bg8dos9MODMXjKtxWCXwQArjLT2z4TC3Q0,1352
427
+ fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml,sha256=edj3juaYos2I9oQ8J6NKQNcNwqwcQGD74ZMosDsB5SY,1341
428
+ fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml,sha256=9S-qsWUIALRwWd_gzNF1bwIuPPGP1MmqTpdQ53cwZmc,1628
392
429
  fusion_bench_config/method/pruning/llama_magnitude_pruning.yaml,sha256=Px8LU_UtDz-YHDFfqQ7scEPOproiFOaudKVshrhCTgc,483
393
430
  fusion_bench_config/method/pruning/llama_random_pruning.yaml,sha256=0RiZS8d42PXZzwncPG8zcbnyYJ9vtfr2sOSqS8oDyT4,325
394
431
  fusion_bench_config/method/pruning/llama_wanda_pruning.yaml,sha256=qKe5yIRsmK2KUyYENENWlw1qlGet9TpDhR-E_uO7vAw,501
395
432
  fusion_bench_config/method/pruning/magnitude_diff_pruning.yaml,sha256=GsxsQ2L3kfsdD7A8o7UAHfiSbAGh53zVXdlYuEIEWR0,130
396
- fusion_bench_config/method/regmean/clip_regmean.yaml,sha256=svZqwicYpbEx1vZL2IISfQulBNAmTm8X_mAP6JrLCDU,402
433
+ fusion_bench_config/method/rankone_moe/rankone_moe.yaml,sha256=RWf94HqYBinZxH-jhi3h8UOLXxv1P5doy0YcTQM-plw,866
434
+ fusion_bench_config/method/regmean/clip_regmean.yaml,sha256=dxSJMRam6YMks7zYx4ACgvrLP5cndxzraVO93SGhyYo,425
397
435
  fusion_bench_config/method/regmean/gpt2_regmean.yaml,sha256=CL6f3GKQTSiLonrak8uEFoFn6MrzQ-ZJp4zXCwCllSk,423
398
436
  fusion_bench_config/method/regmean/regmean.yaml,sha256=ZgVVLx-lHwVgjtjTl4VZUlthh8yyua87QvoJfmNHud4,101
399
437
  fusion_bench_config/method/slerp/slerp.yaml,sha256=DIsS8xS2CnKLyF5OHz_RWG87A48iElevDbVTUHYobDg,118
@@ -403,6 +441,7 @@ fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml,sha256=G88mabTTn
403
441
  fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256=A_QFhwAzbzXxDkOPmXRbPTj2TBib66d3_3mkrf-Xu0k,641
404
442
  fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=w1OWb38nW08K_hvrRMsCwmRxHWLGQfSSXg5nTiYaP8E,635
405
443
  fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml,sha256=J6vYIwqzh95-B3ekDias3FnCrVr4sig4zxpWyvz8hZ0,613
444
+ fusion_bench_config/method/surgery/adamerging_surgery.yaml,sha256=Ne9JlJFgsRYcygBNCOBSN1ygBcLkE6I-8yusfTxyg-Y,826
406
445
  fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=mK09Ohsvj0Q6suj5qJM4DyCzRy192QBt4wjHS6W29IY,197
407
446
  fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=jiAco7M1XO0aekHFZKLKlXL_jRoCA8bgGD44Z7iB208,1001
408
447
  fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
@@ -492,7 +531,11 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and
492
531
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml,sha256=-Tt_YggxkuIGT4_q5FR16zPvW2wWhGJ5LL8omxvHjvw,380
493
532
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml,sha256=PrDQr04UnhAciDrdtUutx-prMxF1Cs4jrEar7uJ-1Es,238
494
533
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=aSu0QUpcEZXKbL9PAUKCQAVvs5CksG1s7PPCvjTsIzA,234
534
+ fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=BXsYH04vAUPT4Cpr8lS1px-CYYKaCTMRWuHGWgC6qE0,647
535
+ fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=m7NDjkzFbGG8E8e_r2UUxtXjNbMFvtAcZOXyBaJOyX4,645
495
536
  fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=hbjSkVle5zpcqGDSMGaJ20CLoO0ljIXG-gtdONuaFBY,803
537
+ fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml,sha256=OOnKzmsz6iiO2jI5ZyGmCem_Pcs3a25Dveb5PLfwpUM,593
538
+ fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml,sha256=K403avKEQlK4uRhZYHbKGluCG37sMUjLRytBR3LspmI,577
496
539
  fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml,sha256=RXdm5BQmYfq9XWVli0NsQ1Xh7jD61XnhRBOSlmd9FcI,825
497
540
  fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=aX0rWwB-p4N94bPX1QGwqKNIWnTrkNMuF7sMAQHzjQE,549
498
541
  fusion_bench_config/modelpool/Seq2SeqLMPool/_template.yaml,sha256=mRx-Xx4s6_IBoJJRogIBW4egmqW0wi1kGVWp_YwYVvQ,233
@@ -501,6 +544,8 @@ fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml,sha256
501
544
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml,sha256=GfTY343bt5YtxtUkQxSacrtQav9lT9Y-t1VIL1Chs4k,1726
502
545
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml,sha256=2YBIzqYGluOT2r6dOFpUYE4Cbdd2XoHAUps-kCDxVPQ,185
503
546
  fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml,sha256=W1y3fKY9UTTRyv7nqbIO5DESlQVfNsWlhkHJMUYh7B4,1824
547
+ fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml,sha256=JUzGOLANW92Y_rljOOZKmwBQvWrJsko_ziayurzHSTY,880
548
+ fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml,sha256=Cg9W315FzKP3DC_-bkIyYZp1nU6UoSQ6xe-MsMx-P8A,626
504
549
  fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_clean.yaml,sha256=vcU1ygptQ7nlufCEdKDWGMyi-OH4zJs55_vxG-iNHBc,541
505
550
  fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=Ged9KWmmGl29hq0gXzyG1DlryuLebDQAJIb_t5PvqiE,758
506
551
  fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml,sha256=gk_RB12EeYrEUNlZJHtZ3XKIm_LDraqE1hC_lpOEvtY,518
@@ -508,16 +553,18 @@ fusion_bench_config/taskpool/dummy.yaml,sha256=Id4Y_j7oc39qWjjEFG3qLmmMI1fGXXt34
508
553
  fusion_bench_config/taskpool/flan-t5_glue_text_generation.yaml,sha256=3MxfXiiwWJHEVgJ7aViTR7kzOV_YxXLL-fNHtnBaWN4,1002
509
554
  fusion_bench_config/taskpool/gpt-2_glue.yaml,sha256=16bw4-g08pL51M4OWAP08kWZPj6JcEefz4Xc4XhCTLQ,950
510
555
  fusion_bench_config/taskpool/nyuv2_taskpool.yaml,sha256=UaxDpFqEPkEz3h2CjFleUxsmnFnaY1aLXerkud8Zm9s,133
556
+ fusion_bench_config/taskpool/reward_model_evaluation.yaml,sha256=qaUqKcb6E_XfoS5J-FcteMJzFzthNb4aLpB9aKW4jbU,442
511
557
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml,sha256=X70J8HMoIcpKaYGjg7KaaXRvz1tPUbuCqKvK9-kGHrM,1310
512
558
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml,sha256=eoNUaX-cBjpJJt0BYb-ZCNiIlv1SarX9toiGAwHbES0,227
513
559
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml,sha256=AmfMrb2_wXDfRtUDsSCNgbuSicNsfC2vRlwXW-uNeJA,784
514
560
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_L14.yaml,sha256=UYOSR9RJhup6pSC0N7UvvnlpXTkiCdD4tzsx-HyQ_GA,269
515
561
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_val.yaml,sha256=_hqQweyZdCztqvjtuYrhCx4Hdqe959FFCdL7_IspR2w,261
516
562
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml,sha256=9hbvC3k5x6NpA9tRDYeORhrjEyd2VH5ztMdLU67Adjk,249
563
+ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=iQMj2VpDTe_D8OfCo94w5Ud2MON-EGa0DzVr6UmphrA,436
517
564
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=i5Bn8bLl2cgqvrgtIGmoovUfSMehk_m-6C2wwcx5JMU,435
518
- fusion_bench-0.2.5.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
519
- fusion_bench-0.2.5.dist-info/METADATA,sha256=Kv69uDo6ROZOarhCQ81ldxjtsp_9oF9nrMMzY1WE4C4,13528
520
- fusion_bench-0.2.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
521
- fusion_bench-0.2.5.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
522
- fusion_bench-0.2.5.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
523
- fusion_bench-0.2.5.dist-info/RECORD,,
565
+ fusion_bench-0.2.7.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
566
+ fusion_bench-0.2.7.dist-info/METADATA,sha256=XMvOFwwYoq1_J4Fta1kJ2J0grFb4k-I3CPA_ApmjPRM,13528
567
+ fusion_bench-0.2.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
568
+ fusion_bench-0.2.7.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
569
+ fusion_bench-0.2.7.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
570
+ fusion_bench-0.2.7.dist-info/RECORD,,
@@ -11,7 +11,7 @@ _target_: fusion_bench.programs.FabricModelFusionProgram
11
11
  _recursive_: false
12
12
  fast_dev_run: false # Run a single batch of data to test the model or method
13
13
  # Run the script without actually running the experiment, use with `print_config=true`.
14
- # You can also use `--cfg` or `-c` to show the configuration instead of runing.
14
+ # You can also use `--cfg` or `-c` to show the configuration instead of running.
15
15
  dry_run: false
16
16
  print_config: true # Print the configuration to the console
17
17
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
@@ -0,0 +1,6 @@
1
+ alpaca-cleaned:
2
+ _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset
3
+ tokenizer: ???
4
+ path: "yahma/alpaca-cleaned"
5
+ split: train
6
+ cache_path: null
@@ -0,0 +1,3 @@
1
+ ultrachat-200k:
2
+ _target_: fusion_bench.dataset.ultrachat.load_tokenized_ultrachat_200k
3
+ tokenizer: ???
@@ -0,0 +1,16 @@
1
+ defaults:
2
+ - loggers: tensorboard_logger
3
+ - strategy: llama_peft_fsdp
4
+ - _self_
5
+
6
+ _target_: lightning.Fabric
7
+ _recursive_: true
8
+ # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
9
+ # The value applies per node.
10
+ devices: auto
11
+ # The hardware to run on. Possible choices are:
12
+ # ``"cpu"``, ``"cuda"``, ``"mps"``, ``"gpu"``, ``"tpu"``, ``"auto"``.
13
+ # for example: fabric.accelerator=cpu
14
+ accelerator: auto
15
+ # reference to the precision policy: https://lightning.ai/docs/fabric/stable/api/fabric_args.html#precision
16
+ precision: bf16-true
@@ -0,0 +1,2 @@
1
+ # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases
2
+ _target_: wandb.integration.lightning.fabric.WandbLogger
@@ -0,0 +1,10 @@
1
+ # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
2
+ _target_: lightning.fabric.strategies.DeepSpeedStrategy
3
+
4
+ accelerator: null
5
+ zero_optimization: true
6
+ stage: 2
7
+ offload_optimizer: false
8
+ offload_parameters: false
9
+ offload_params_device: "cpu"
10
+ offload_optimizer_device: "cpu"
@@ -0,0 +1,9 @@
1
+ _target_: lightning.fabric.strategies.FSDPStrategy
2
+ sharding_strategy: FULL_SHARD
3
+ state_dict_type: full # Save a single, consolidated checkpoint file
4
+ cpu_offload: false
5
+ auto_wrap_policy:
6
+ _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
7
+ activation_checkpointing_policy: ${.auto_wrap_policy}
8
+ # limit_all_gathers: true
9
+
@@ -11,7 +11,7 @@ _target_: fusion_bench.programs.FabricModelFusionProgram
11
11
  _recursive_: false
12
12
  fast_dev_run: false # Run a single batch of data to test the model or method
13
13
  # Run the script without actually running the experiment, use with `print_config=true`.
14
- # You can also use `--cfg` or `-c` to show the configuration instead of runing.
14
+ # You can also use `--cfg` or `-c` to show the configuration instead of running.
15
15
  dry_run: false
16
16
  print_config: true # Print the configuration to the console
17
17
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
@@ -0,0 +1,19 @@
1
+ defaults:
2
+ - hydra: default
3
+ - fabric: llama_fsdp
4
+ # --- Model, Method, Task ---
5
+ - method: lm_finetune/fullfinetune_sft.yaml
6
+ - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
7
+ - taskpool: dummy
8
+ - _self_
9
+
10
+ _target_: fusion_bench.programs.FabricModelFusionProgram
11
+ _recursive_: false
12
+
13
+ fast_dev_run: false # Run a single batch of data to test the model or method
14
+ # Run the script without actually running the experiment, use with `print_config=true`.
15
+ # You can also use `--cfg` or `-c` to show the configuration instead of running.
16
+ dry_run: false
17
+ print_config: true # Print the configuration to the console
18
+ report_save_path: null # path to save the result report
19
+ print_function_call: true # set to false if you don't want to print the details of instantiate calls
@@ -0,0 +1,47 @@
1
+ _target_: fusion_bench.method.BradleyTerryRewardModeling
2
+ _recursive_: False
3
+
4
+ optimizer:
5
+ _target_: torch.optim.AdamW
6
+ lr: 1e-5
7
+ weight_decay: 0.01
8
+ fused: null
9
+
10
+ lr_scheduler:
11
+ _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
12
+ T_max: _T_max_ # this will be replaced by the expected number of training steps
13
+ init_lr: 0
14
+ warmup_steps: 100
15
+ max_lr: ${..optimizer.lr}
16
+ min_lr: 1e-6
17
+
18
+ dataloader_kwargs:
19
+ # per-gpu batch size
20
+ batch_size: 1
21
+ num_workers: 0
22
+ pin_memory: True
23
+
24
+ # Training hyperparameters
25
+ # if max_epochs=-1, max_steps will be used to determine the number of training steps
26
+ max_epochs: 3
27
+ max_steps: -1
28
+ max_steps_per_epoch: -1
29
+ accumulate_grad_batches: 1
30
+ lr_scheduler_interval: step
31
+ lr_scheduler_frequency: 1
32
+ # Checkpointing may be done by epoch or step, and at the end of training
33
+ # `checkpoint_save_interval` can be 'epoch' or 'step'
34
+ checkpoint_save_interval: epoch
35
+ checkpoint_save_frequency: 1
36
+ # Whether to use gradient clipping, and if so, the value and algorithm
37
+ gradient_clip_val: null
38
+ gradient_clip_algorithm: norm
39
+ save_optimizer_state: false
40
+ # save_full_model must be true when using shared FSDP
41
+ save_full_model: true
42
+ # save_ckpt_type can be 'hf' or 'lightning'
43
+ save_ckpt_type: lightning
44
+ # Path to checkpoint to load from, used for resuming training
45
+ ckpt_path: null
46
+ max_length: 4096
47
+ fix_token_embedding: true
@@ -1,16 +1,19 @@
1
- _target_: ttt.method.FullFinetuneSFT
1
+ _target_: fusion_bench.method.FullFinetuneSFT
2
2
  _recursive_: False
3
3
 
4
4
  optimizer:
5
5
  _target_: torch.optim.AdamW
6
- fused: True
6
+ lr: 1e-5
7
7
  weight_decay: 0.01
8
- lr: 5e-5
8
+ fused: null
9
9
 
10
10
  lr_scheduler:
11
- _target_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
12
- num_warmup_steps: 5
13
- num_training_steps: _T_max_ # this will be replaced by the expected number of training steps
11
+ _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
12
+ T_max: _T_max_ # this will be replaced by the expected number of training steps
13
+ init_lr: 0
14
+ warmup_steps: 100
15
+ max_lr: ${..optimizer.lr}
16
+ min_lr: 1e-6
14
17
 
15
18
  dataloader_kwargs:
16
19
  # per-gpu batch size
@@ -36,5 +39,9 @@ gradient_clip_algorithm: norm
36
39
  save_optimizer_state: false
37
40
  # save_full_model must be true when using shared FSDP
38
41
  save_full_model: true
42
+ # save_ckpt_type can be 'hf' or 'lightning'
43
+ save_ckpt_type: lightning
39
44
  # Path to checkpoint to load from, used for resuming training
40
45
  ckpt_path: null
46
+ max_length: 4096
47
+ fix_token_embedding: true
@@ -1,16 +1,16 @@
1
- _target_: ttt.method.FullFinetuneSFT
1
+ _target_: fusion_bench.method.PeftFinetuneSFT
2
2
  _recursive_: False
3
3
 
4
4
  optimizer:
5
5
  _target_: torch.optim.AdamW
6
- fused: True
6
+ lr: 1e-4
7
7
  weight_decay: 0.01
8
- lr: 5e-5
8
+ fused: null
9
9
 
10
10
  lr_scheduler:
11
- _target_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
12
- num_warmup_steps: 5
13
- num_training_steps: _T_max_ # this will be replaced by the expected number of training steps
11
+ _target_: torch.optim.lr_scheduler.CosineAnnealingLR
12
+ T_max: _T_max_ # this will be replaced by the expected number of training steps
13
+ eta_min: 1e-6
14
14
 
15
15
  dataloader_kwargs:
16
16
  # per-gpu batch size
@@ -22,9 +22,14 @@ peft_config:
22
22
  _target_: peft.LoraConfig
23
23
  task_type: peft.TaskType.CAUSAL_LM
24
24
  target_modules:
25
- - query
26
- - value
27
- r: 16
25
+ # lora attention modules
26
+ - q_proj
27
+ - v_proj
28
+ # lora mlp modules
29
+ - gate_proj
30
+ - down_proj
31
+ - up_proj
32
+ r: 64
28
33
  lora_alpha: 16
29
34
  lora_dropout: 0
30
35
  bias: none
@@ -51,5 +56,8 @@ gradient_clip_algorithm: norm
51
56
  save_optimizer_state: false
52
57
  # save_full_model must be true when using shared FSDP
53
58
  save_full_model: false
59
+ # save_ckpt_type can be 'peft' or 'lightning'
60
+ save_ckpt_type: lightning
54
61
  # Path to checkpoint to load from, used for resuming training
55
62
  ckpt_path: null
63
+ max_length: 4096