fusion-bench 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench-0.2.9/LICENSE +21 -0
- fusion_bench-0.2.9/PKG-INFO +258 -0
- fusion_bench-0.2.9/README.md +210 -0
- fusion_bench-0.2.9/fusion_bench/__init__.py +20 -0
- fusion_bench-0.2.9/fusion_bench/__main__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/compat/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/compat/method/__init__.py +109 -0
- fusion_bench-0.2.9/fusion_bench/compat/method/base_algorithm.py +58 -0
- fusion_bench-0.2.9/fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py +34 -0
- fusion_bench-0.2.9/fusion_bench/compat/modelpool/__init__.py +116 -0
- fusion_bench-0.2.9/fusion_bench/compat/modelpool/base_pool.py +328 -0
- fusion_bench-0.2.9/fusion_bench/compat/modelpool/huggingface_clip_vision.py +178 -0
- fusion_bench-0.2.9/fusion_bench/compat/taskpool/__init__.py +95 -0
- fusion_bench-0.2.9/fusion_bench/compat/taskpool/base_pool.py +111 -0
- fusion_bench-0.2.9/fusion_bench/compat/taskpool/clip_image_classification.py +210 -0
- fusion_bench-0.2.9/fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py +175 -0
- fusion_bench-0.2.9/fusion_bench/constants/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/constants/paths.py +18 -0
- fusion_bench-0.2.9/fusion_bench/dataset/__init__.py +29 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/__init__.py +6 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/arc.py +308 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/arc_agi.py +365 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/augmenters.py +1036 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/messagers.py +1355 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/np_cache.py +168 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/preprocess.py +298 -0
- fusion_bench-0.2.9/fusion_bench/dataset/arc_agi/representers.py +1019 -0
- fusion_bench-0.2.9/fusion_bench/dataset/clip_dataset.py +71 -0
- fusion_bench-0.2.9/fusion_bench/dataset/fer2013.py +12 -0
- fusion_bench-0.2.9/fusion_bench/dataset/gpt2_glue.py +300 -0
- fusion_bench-0.2.9/fusion_bench/dataset/gsm8k.py +60 -0
- fusion_bench-0.2.9/fusion_bench/dataset/image_dataset.py +55 -0
- fusion_bench-0.2.9/fusion_bench/dataset/imdb.py +11 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/alpaca.py +232 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/collate.py +120 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/metamathqa.py +50 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/openai.py +160 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/preference_700k.py +70 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/sharegpt.py +141 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/squad.py +125 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/stanford_shp.py +90 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/ultrachat.py +58 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/utils/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/dataset/llama/wikitext.py +89 -0
- fusion_bench-0.2.9/fusion_bench/dataset/nyuv2.py +119 -0
- fusion_bench-0.2.9/fusion_bench/method/__init__.py +177 -0
- fusion_bench-0.2.9/fusion_bench/method/ada_svd/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/ada_svd/clip_vision.py +319 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/__init__.py +6 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +46 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/clip_task_wise_adamerging.py +187 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/entropy_loss.py +25 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/flan_t5_layer_wise_adamerging.py +332 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/gpt2_layer_wise_adamerging.py +351 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/layer_wise_adamerging.py +252 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/llama_adamerging.py +335 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/min_norm_solvers.py +227 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/task_wise_adamerging.py +174 -0
- fusion_bench-0.2.9/fusion_bench/method/adamerging/utils.py +15 -0
- fusion_bench-0.2.9/fusion_bench/method/analysis/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/analysis/task_vector_cos_similarity.py +172 -0
- fusion_bench-0.2.9/fusion_bench/method/analysis/task_vector_violin_plot.py +205 -0
- fusion_bench-0.2.9/fusion_bench/method/base_algorithm.py +44 -0
- fusion_bench-0.2.9/fusion_bench/method/classification/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/classification/clip_finetune.py +444 -0
- fusion_bench-0.2.9/fusion_bench/method/classification/continual_clip_finetune.py +297 -0
- fusion_bench-0.2.9/fusion_bench/method/concrete_subspace/__init__.py +6 -0
- fusion_bench-0.2.9/fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py +595 -0
- fusion_bench-0.2.9/fusion_bench/method/concrete_subspace/clip_concrete_task_arithmetic.py +263 -0
- fusion_bench-0.2.9/fusion_bench/method/dare/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/method/dare/simple_average.py +31 -0
- fusion_bench-0.2.9/fusion_bench/method/dare/task_arithmetic.py +82 -0
- fusion_bench-0.2.9/fusion_bench/method/dare/ties_merging.py +100 -0
- fusion_bench-0.2.9/fusion_bench/method/dare/utils.py +87 -0
- fusion_bench-0.2.9/fusion_bench/method/dawe/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/dawe/dawe_for_clip.py +274 -0
- fusion_bench-0.2.9/fusion_bench/method/dawe/warppers/__init__.py +13 -0
- fusion_bench-0.2.9/fusion_bench/method/dawe/warppers/dawe_model.py +256 -0
- fusion_bench-0.2.9/fusion_bench/method/depth_upscaling/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/depth_upscaling/depth_upscaling.py +89 -0
- fusion_bench-0.2.9/fusion_bench/method/depth_upscaling/depth_upscaling_for_llama.py +57 -0
- fusion_bench-0.2.9/fusion_bench/method/dummy.py +35 -0
- fusion_bench-0.2.9/fusion_bench/method/ensemble.py +98 -0
- fusion_bench-0.2.9/fusion_bench/method/fisher_merging/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/method/fisher_merging/clip_fisher_merging.py +191 -0
- fusion_bench-0.2.9/fusion_bench/method/fisher_merging/fisher_merging.py +484 -0
- fusion_bench-0.2.9/fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +193 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/__init__.py +6 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/expo.py +118 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/linear_interpolation.py +60 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/llama_expo.py +229 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/simple_average_for_llama.py +54 -0
- fusion_bench-0.2.9/fusion_bench/method/linear/task_arithmetic_for_llama.py +57 -0
- fusion_bench-0.2.9/fusion_bench/method/lm_finetune/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/lm_finetune/bradley_terry_rm.py +432 -0
- fusion_bench-0.2.9/fusion_bench/method/lm_finetune/causal_lm_pretrain.py +7 -0
- fusion_bench-0.2.9/fusion_bench/method/lm_finetune/fullfinetune_sft.py +375 -0
- fusion_bench-0.2.9/fusion_bench/method/lm_finetune/peftfinetune_sft.py +370 -0
- fusion_bench-0.2.9/fusion_bench/method/mixture_of_experts/__init__.py +7 -0
- fusion_bench-0.2.9/fusion_bench/method/mixture_of_experts/mixtral_merging.py +112 -0
- fusion_bench-0.2.9/fusion_bench/method/mixture_of_experts/mixtral_upcycling.py +329 -0
- fusion_bench-0.2.9/fusion_bench/method/model_recombination.py +121 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/opcm.py +277 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/task_arithmetic.py +115 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/ties_merging.py +156 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/utils.py +73 -0
- fusion_bench-0.2.9/fusion_bench/method/opcm/weight_average.py +120 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/__init__.py +5 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/llama_magnitude_prune.py +202 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/llama_random_prune.py +143 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/llama_wanda_prune.py +359 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/magnitude_diff_pruning.py +180 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/prune_utils.py +165 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/__init__.py +7 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/ablate.py +188 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/data.py +135 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/eval.py +245 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/layerwrapper.py +61 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/prune.py +581 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/prune_opt.py +539 -0
- fusion_bench-0.2.9/fusion_bench/method/pruning/wanda_utils/sparsegpt.py +165 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/__init__.py +5 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/clip_pwe_moe.py +315 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/module.py +316 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/phn/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/phn/solvers.py +195 -0
- fusion_bench-0.2.9/fusion_bench/method/pwe_moe/utils.py +43 -0
- fusion_bench-0.2.9/fusion_bench/method/rankone_moe/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/rankone_moe/clip_rankone_moe.py +160 -0
- fusion_bench-0.2.9/fusion_bench/method/rankone_moe/rankone_moe.py +249 -0
- fusion_bench-0.2.9/fusion_bench/method/regmean/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/method/regmean/clip_regmean.py +131 -0
- fusion_bench-0.2.9/fusion_bench/method/regmean/gpt2_regmean.py +147 -0
- fusion_bench-0.2.9/fusion_bench/method/regmean/regmean.py +375 -0
- fusion_bench-0.2.9/fusion_bench/method/simple_average.py +112 -0
- fusion_bench-0.2.9/fusion_bench/method/slerp/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/slerp/slerp.py +101 -0
- fusion_bench-0.2.9/fusion_bench/method/slerp/slerp_utils.py +107 -0
- fusion_bench-0.2.9/fusion_bench/method/smile_upscaling/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/smile_upscaling/singular_projection_merging.py +198 -0
- fusion_bench-0.2.9/fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +331 -0
- fusion_bench-0.2.9/fusion_bench/method/smile_upscaling/smile_upscaling.py +573 -0
- fusion_bench-0.2.9/fusion_bench/method/sparse_we_moe/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py +248 -0
- fusion_bench-0.2.9/fusion_bench/method/sparse_we_moe/sparse_we_moe.py +301 -0
- fusion_bench-0.2.9/fusion_bench/method/sparselo/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/sparselo/sparselo.py +955 -0
- fusion_bench-0.2.9/fusion_bench/method/surgery/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +157 -0
- fusion_bench-0.2.9/fusion_bench/method/tall_mask/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/method/tall_mask/utils.py +234 -0
- fusion_bench-0.2.9/fusion_bench/method/task_arithmetic/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/task_arithmetic/task_arithmetic.py +151 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/TSVC.py +16 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/TSVM.py +63 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/__init__.py +9 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/utils/TSVC_utils.py +50 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +640 -0
- fusion_bench-0.2.9/fusion_bench/method/task_singular_vector/utils/__init__.py +7 -0
- fusion_bench-0.2.9/fusion_bench/method/ties_merging/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/ties_merging/ties_merging.py +117 -0
- fusion_bench-0.2.9/fusion_bench/method/ties_merging/ties_merging_utils.py +331 -0
- fusion_bench-0.2.9/fusion_bench/method/trust_region/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/trust_region/clip_task_arithmetic.py +205 -0
- fusion_bench-0.2.9/fusion_bench/method/trust_region/utils.py +58 -0
- fusion_bench-0.2.9/fusion_bench/method/we_moe/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/method/we_moe/clip_we_moe.py +161 -0
- fusion_bench-0.2.9/fusion_bench/method/we_moe/we_moe.py +247 -0
- fusion_bench-0.2.9/fusion_bench/method/weighted_average/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/method/weighted_average/llama.py +113 -0
- fusion_bench-0.2.9/fusion_bench/method/weighted_average/weighted_average.py +102 -0
- fusion_bench-0.2.9/fusion_bench/metrics/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/metrics/continual_learning/backward_transfer.py +22 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/__init__.py +11 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/depth.py +45 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/loss.py +31 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/noise.py +16 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/normal.py +48 -0
- fusion_bench-0.2.9/fusion_bench/metrics/nyuv2/segmentation.py +43 -0
- fusion_bench-0.2.9/fusion_bench/metrics/text_to_image_generation/__init__.py +9 -0
- fusion_bench-0.2.9/fusion_bench/metrics/text_to_image_generation/aesthetic_scorer.py +123 -0
- fusion_bench-0.2.9/fusion_bench/metrics/text_to_image_generation/compressibility.py +49 -0
- fusion_bench-0.2.9/fusion_bench/metrics/text_to_image_generation/pickscore_scorer.py +95 -0
- fusion_bench-0.2.9/fusion_bench/mixins/__init__.py +28 -0
- fusion_bench-0.2.9/fusion_bench/mixins/clip_classification.py +252 -0
- fusion_bench-0.2.9/fusion_bench/mixins/fabric_training.py +320 -0
- fusion_bench-0.2.9/fusion_bench/mixins/lightning_fabric.py +174 -0
- fusion_bench-0.2.9/fusion_bench/mixins/optim/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/mixins/optim/adamw_with_warmup.py +42 -0
- fusion_bench-0.2.9/fusion_bench/mixins/rich_live.py +21 -0
- fusion_bench-0.2.9/fusion_bench/mixins/serialization.py +132 -0
- fusion_bench-0.2.9/fusion_bench/mixins/simple_profiler.py +79 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/PeftModelForSeq2SeqLM.py +49 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/__init__.py +42 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/base_pool.py +268 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/causal_lm/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/causal_lm/causal_lm.py +139 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/clip_vision/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/clip_vision/modelpool.py +145 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/huggingface_automodel.py +20 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/huggingface_gpt2_classification.py +63 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/nyuv2_modelpool.py +40 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/seq2seq_lm/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/seq2seq_lm/modelpool.py +65 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/seq_classification_lm/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/seq_classification_lm/reward_model.py +15 -0
- fusion_bench-0.2.9/fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +98 -0
- fusion_bench-0.2.9/fusion_bench/models/__init__.py +3 -0
- fusion_bench-0.2.9/fusion_bench/models/chat_templates/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/models/chat_templates/llama_3_Instruct.py +1 -0
- fusion_bench-0.2.9/fusion_bench/models/chat_templates/load_tokenizer.py +43 -0
- fusion_bench-0.2.9/fusion_bench/models/hf_clip.py +199 -0
- fusion_bench-0.2.9/fusion_bench/models/linearized/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/models/linearized/linearized_model_utils.py +91 -0
- fusion_bench-0.2.9/fusion_bench/models/linearized/vision_model.py +122 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/__init__.py +16 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/embedding.py +87 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/liger_kernel.py +86 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/misc.py +112 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/mod.py +52 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/model_utils/visual.py +241 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/patcher.py +78 -0
- fusion_bench-0.2.9/fusion_bench/models/llama/tokenizer_loader.py +153 -0
- fusion_bench-0.2.9/fusion_bench/models/masks/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/models/masks/mask_model.py +160 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/configuration_losparse_llama.py +205 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/losparse_linear.py +67 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/modeling_losparse_llama.py +1825 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/register.py +8 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_losparse_llama/utils.py +60 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_smile_mistral/__init__.py +48 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_smile_mistral/configuration_smile_mistral.py +21 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py +1034 -0
- fusion_bench-0.2.9/fusion_bench/models/modeling_smile_mistral/register.py +8 -0
- fusion_bench-0.2.9/fusion_bench/models/nyuv2/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/models/nyuv2/aspp.py +82 -0
- fusion_bench-0.2.9/fusion_bench/models/nyuv2/lightning_module.py +176 -0
- fusion_bench-0.2.9/fusion_bench/models/nyuv2/resnet.py +405 -0
- fusion_bench-0.2.9/fusion_bench/models/nyuv2/resnet_dilated.py +99 -0
- fusion_bench-0.2.9/fusion_bench/models/parameter_dict.py +75 -0
- fusion_bench-0.2.9/fusion_bench/models/rankone_moe.py +410 -0
- fusion_bench-0.2.9/fusion_bench/models/separate_io.py +105 -0
- fusion_bench-0.2.9/fusion_bench/models/smile_moe/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/models/smile_moe/linear.py +256 -0
- fusion_bench-0.2.9/fusion_bench/models/sparse_we_moe.py +459 -0
- fusion_bench-0.2.9/fusion_bench/models/surgery/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/models/surgery/surgerymodelwrapper.py +158 -0
- fusion_bench-0.2.9/fusion_bench/models/utils.py +80 -0
- fusion_bench-0.2.9/fusion_bench/models/we_moe.py +247 -0
- fusion_bench-0.2.9/fusion_bench/models/wrappers/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/models/wrappers/ensemble.py +183 -0
- fusion_bench-0.2.9/fusion_bench/models/wrappers/layer_wise_fusion.py +336 -0
- fusion_bench-0.2.9/fusion_bench/models/wrappers/task_wise_fusion.py +249 -0
- fusion_bench-0.2.9/fusion_bench/optim/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/optim/exception.py +47 -0
- fusion_bench-0.2.9/fusion_bench/optim/lr_scheduler/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/optim/lr_scheduler/linear_warmup.py +222 -0
- fusion_bench-0.2.9/fusion_bench/optim/lr_scheduler/utils/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/optim/lr_scheduler/utils/visualization.py +119 -0
- fusion_bench-0.2.9/fusion_bench/optim/mezo.py +118 -0
- fusion_bench-0.2.9/fusion_bench/programs/__init__.py +20 -0
- fusion_bench-0.2.9/fusion_bench/programs/base_program.py +9 -0
- fusion_bench-0.2.9/fusion_bench/programs/fabric_fusion_program.py +299 -0
- fusion_bench-0.2.9/fusion_bench/scripts/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/scripts/cli.py +43 -0
- fusion_bench-0.2.9/fusion_bench/scripts/clip/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/scripts/clip/convert_checkpoint.py +39 -0
- fusion_bench-0.2.9/fusion_bench/scripts/imgui.py +218 -0
- fusion_bench-0.2.9/fusion_bench/scripts/nyuv2_mtl_train.py +137 -0
- fusion_bench-0.2.9/fusion_bench/scripts/webui.py +405 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/__init__.py +39 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/base_pool.py +35 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/clip_vision/__init__.py +4 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +112 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py +120 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/clip_vision/taskpool.py +392 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/dummy.py +58 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/gpt2_text_classification.py +149 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/llama/__init__.py +1 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/llama/reward_model.py +157 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/llama/test_generation.py +185 -0
- fusion_bench-0.2.9/fusion_bench/taskpool/nyuv2_taskpool.py +65 -0
- fusion_bench-0.2.9/fusion_bench/tasks/__init__.py +2 -0
- fusion_bench-0.2.9/fusion_bench/tasks/base_task.py +18 -0
- fusion_bench-0.2.9/fusion_bench/tasks/classification.py +75 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/__init__.py +183 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/cifar10.py +33 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/cifar100.py +146 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/clip_dataset.py +1 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/cub_200_2011.py +208 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/dtd.py +60 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/emnist_letters.py +31 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/emnist_mnist.py +5 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/eurosat.py +18 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/fashion_mnist.py +18 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/fer2013.py +18 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/flower102.py +106 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/food101.py +105 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/gtsrb.py +51 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/imagenet.py +2103 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/kmnist.py +17 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/mnist.py +5 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/mongo_leaf_disease.py +19 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/oxford_iiit_pet.py +41 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/pcam.py +5 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/rendered_sst2.py +3 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/resisc45.py +68 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/stanford_cars.py +209 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/stl10.py +17 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/sun397.py +404 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/svhn.py +5 -0
- fusion_bench-0.2.9/fusion_bench/tasks/clip_classification/tiny_imagenet.py +208 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/datasets_preprocess.py +71 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py +132 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +64 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py +379 -0
- fusion_bench-0.2.9/fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py +52 -0
- fusion_bench-0.2.9/fusion_bench/utils/__init__.py +14 -0
- fusion_bench-0.2.9/fusion_bench/utils/auto.py +31 -0
- fusion_bench-0.2.9/fusion_bench/utils/cache_utils.py +58 -0
- fusion_bench-0.2.9/fusion_bench/utils/data.py +165 -0
- fusion_bench-0.2.9/fusion_bench/utils/devices.py +231 -0
- fusion_bench-0.2.9/fusion_bench/utils/dict.py +43 -0
- fusion_bench-0.2.9/fusion_bench/utils/dtype.py +146 -0
- fusion_bench-0.2.9/fusion_bench/utils/expr.py +90 -0
- fusion_bench-0.2.9/fusion_bench/utils/fabric.py +17 -0
- fusion_bench-0.2.9/fusion_bench/utils/functools.py +37 -0
- fusion_bench-0.2.9/fusion_bench/utils/hydra_utils.py +28 -0
- fusion_bench-0.2.9/fusion_bench/utils/instantiate.py +450 -0
- fusion_bench-0.2.9/fusion_bench/utils/json.py +93 -0
- fusion_bench-0.2.9/fusion_bench/utils/lazy_imports.py +74 -0
- fusion_bench-0.2.9/fusion_bench/utils/misc.py +18 -0
- fusion_bench-0.2.9/fusion_bench/utils/packages.py +84 -0
- fusion_bench-0.2.9/fusion_bench/utils/parameters.py +323 -0
- fusion_bench-0.2.9/fusion_bench/utils/path.py +22 -0
- fusion_bench-0.2.9/fusion_bench/utils/plot/__init__.py +0 -0
- fusion_bench-0.2.9/fusion_bench/utils/plot/color_data.py +1726 -0
- fusion_bench-0.2.9/fusion_bench/utils/plot/token.py +52 -0
- fusion_bench-0.2.9/fusion_bench/utils/plot/token_notebook.py +127 -0
- fusion_bench-0.2.9/fusion_bench/utils/pylogger.py +55 -0
- fusion_bench-0.2.9/fusion_bench/utils/rich_utils.py +201 -0
- fusion_bench-0.2.9/fusion_bench/utils/set.py +8 -0
- fusion_bench-0.2.9/fusion_bench/utils/state_dict_arithmetic.py +297 -0
- fusion_bench-0.2.9/fusion_bench/utils/strenum/__init__.py +326 -0
- fusion_bench-0.2.9/fusion_bench/utils/strenum/_name_mangler.py +127 -0
- fusion_bench-0.2.9/fusion_bench/utils/strenum/_version.py +556 -0
- fusion_bench-0.2.9/fusion_bench/utils/tensorboard.py +51 -0
- fusion_bench-0.2.9/fusion_bench/utils/timer.py +49 -0
- fusion_bench-0.2.9/fusion_bench/utils/type.py +34 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/PKG-INFO +258 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/SOURCES.txt +733 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/dependency_links.txt +1 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/entry_points.txt +3 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/requires.txt +13 -0
- fusion_bench-0.2.9/fusion_bench.egg-info/top_level.txt +1 -0
- fusion_bench-0.2.9/fusion_bench_config/README.md +12 -0
- fusion_bench-0.2.9/fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/README.md +6 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/TALL14.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/TALL20.yaml +28 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/cifar10.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/cifar100.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/cub-200-2011.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/dtd.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/emnist_letters.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/emnist_mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/eurosat.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/fashion_mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/fer2013.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/food101.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/gtsrb.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/kmnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/mango-leaf-disease.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/oxford-iiit-pet.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/oxford_flowers102.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/pcam.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/rendered-sst2.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/resisc45.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/stanford-cars.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/stl10.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/sun397.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/svhn.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/the_eight_tasks.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/test/tiny-imagenet.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/TALL14.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/TALL20.yaml +28 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/cifar10.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/cifar100.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/cub-200-2011.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/dtd.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/emnist_letters.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/emnist_mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/eurosat.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/fashion_mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/fer2013.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/food101.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/gtsrb.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/kmnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/mango-leaf-disease.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/mnist.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/oxford-iiit-pet.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/oxford_flowers102.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/pcam.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/rendered-sst2.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/resisc45.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/stanford-cars.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/stl10.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/sun397.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/svhn.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/the_eight_tasks.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/train/tiny-imagenet.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/dtd.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/eurosat.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/gtsrb.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/mnist.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/resisc45.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/stanford-cars.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/sun397.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/svhn.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/image_classification/val/the_eight_tasks.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/llm_sft/alpaca_cleaned.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/llm_sft/ultrachat_200k.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/question_answering/search_qa.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/question_answering/test/search_qa.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/question_answering/train/MetaMathQA.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/question_answering/train/search_qa.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/question_answering/val/search_qa.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/summarization/test/xsum.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/summarization/train/xsum.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/summarization/val/xsum.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/summarization/xsum.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/test/gsm-hard.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/test/gsm8k.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/test/gsm8k_question_label.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/train/CodeAlpaca-20k.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/train/gsm8k.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/dataset/text_generation/train/gsm8k_question_label.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/auto.yaml +16 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/llama_ddp.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/llama_fsdp.yaml +16 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/llama_peft_fsdp.yaml +16 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/loggers/csv_logger.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/loggers/tensorboard_logger.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/strategy/deepspeed.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/strategy/llama_fsdp.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/fabric_model_fusion.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/hydra/default.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/hydra/help/fusion_bench_help.yaml +47 -0
- fusion_bench-0.2.9/fusion_bench_config/hydra/job_logging/rich_logging.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/llama_full_finetune.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/llama_magnitude_pruning.yaml +16 -0
- fusion_bench-0.2.9/fusion_bench_config/llama_model_fusion.yaml +17 -0
- fusion_bench-0.2.9/fusion_bench_config/method/ada_svd/clip_vision.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/method/adamerging/clip.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/method/adamerging/llama_sft.yaml +33 -0
- fusion_bench-0.2.9/fusion_bench_config/method/adamerging.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/analysis/task_vector_violin_plot.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/classification/clip_continual_finetune.yaml +28 -0
- fusion_bench-0.2.9/fusion_bench_config/method/classification/clip_finetune.yaml +26 -0
- fusion_bench-0.2.9/fusion_bench_config/method/clip_finetune.yaml +26 -0
- fusion_bench-0.2.9/fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml +27 -0
- fusion_bench-0.2.9/fusion_bench_config/method/concrete_subspace/clip_concrete_task_arithmetic.yaml +25 -0
- fusion_bench-0.2.9/fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml +27 -0
- fusion_bench-0.2.9/fusion_bench_config/method/dare/simple_average.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/method/dare/task_arithmetic.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/dare/ties_merging.yaml +15 -0
- fusion_bench-0.2.9/fusion_bench_config/method/dawe/dawe_for_clip.yaml +32 -0
- fusion_bench-0.2.9/fusion_bench_config/method/depth_upscaling.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/method/dummy.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/method/ensemble/max_model_predictor.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/method/ensemble/simple_ensemble.yaml +2 -0
- fusion_bench-0.2.9/fusion_bench_config/method/ensemble/weighted_ensemble.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml +13 -0
- fusion_bench-0.2.9/fusion_bench_config/method/fisher_merging/fisher_merging.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/expo.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/linear_interpolation.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/llama_expo.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/llama_expo_with_dare.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/simple_average_for_llama.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/weighted_average.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/linear/weighted_average_for_llama.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +47 -0
- fusion_bench-0.2.9/fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +47 -0
- fusion_bench-0.2.9/fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +63 -0
- fusion_bench-0.2.9/fusion_bench_config/method/mixtral_moe_merging.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/method/mixtral_moe_upscaling.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/method/model_recombination.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/method/opcm/opcm.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/method/opcm/task_arithmetic.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/method/opcm/ties_merging.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/method/opcm/weight_average.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/method/pruning/llama_magnitude_pruning.yaml +14 -0
- fusion_bench-0.2.9/fusion_bench_config/method/pruning/llama_random_pruning.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/method/pruning/llama_wanda_pruning.yaml +16 -0
- fusion_bench-0.2.9/fusion_bench_config/method/pruning/magnitude_diff_pruning.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/method/pwe_moe_ls_for_clip.yaml +22 -0
- fusion_bench-0.2.9/fusion_bench_config/method/rankone_moe/rankone_moe.yaml +26 -0
- fusion_bench-0.2.9/fusion_bench_config/method/regmean/clip_regmean.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/method/regmean/gpt2_regmean.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/method/regmean/regmean.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/method/simple_average.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/method/slerp/slerp.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml +14 -0
- fusion_bench-0.2.9/fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/method/surgery/adamerging_surgery.yaml +27 -0
- fusion_bench-0.2.9/fusion_bench_config/method/task_arithmetic.yaml +2 -0
- fusion_bench-0.2.9/fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +2 -0
- fusion_bench-0.2.9/fusion_bench_config/method/ties_merging.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml +39 -0
- fusion_bench-0.2.9/fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/README.md +38 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml +22 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_TALL20.yaml +29 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar100.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_dtd.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_eight_tasks.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_emnist_letters.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_fashion_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_fer2013.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_food101.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_kmnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_oxford-iiit-pet.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_oxford_flowers102.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_pcam.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_rendered-sst2.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_stl10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_sun397.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_svhn.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL14.yaml +22 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL20.yaml +29 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_cifar10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_cifar100.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_dtd.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_emnist_letters.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_fashion_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_fer2013.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_food101.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_kmnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_oxford-iiit-pet.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_oxford_flowers102.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_pcam.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_rendered-sst2.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_stl10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_sun397.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_svhn.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_TALL14.yaml +22 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_TALL20.yaml +29 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_cifar10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_cifar100.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_dtd.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_eight_tasks.yaml +10 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_emnist_letters.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_fashion_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_fer2013.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_food101.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_kmnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_mnist.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_pcam.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stl10.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +1 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/download_TALL20_models.sh +6 -0
- fusion_bench-0.2.9/fusion_bench_config/model/clip-vit/generate_vit_model_config.sh +23 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-cola.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-cola_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mnli.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mnli_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mrpc.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mrpc_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qnli.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qnli_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qqp.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qqp_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-rte.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-rte_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-sst2.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-sst2_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-stsb.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-base_glue-stsb_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-cola_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-mnli_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-mrpc_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-qnli_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-qqp_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-rte_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-sst2_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/flan-t5-large_glue-stsb_lora-16.yaml +4 -0
- fusion_bench-0.2.9/fusion_bench_config/model/flan-t5/generate_flan-t5.sh +38 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/_template.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_lora.yaml +53 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_model_only.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL14_model_only.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual_lora.yaml +14 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +24 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_generalization_exp1.yaml +24 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_generalization_exp2.yaml +24 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +13 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_mtl.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_robustness_clean.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted.yaml +29 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml +5 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +15 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_model_only.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +21 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +21 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml +20 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +21 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml +17 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/_template.yaml +8 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml +13 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml +41 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml +68 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml +7 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml +45 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +23 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +14 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/automodelpool.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/gpt-2_glue.yaml +64 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/mixtral_moe_merging.yaml +14 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml +6 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/nyuv2_modelpool.yaml +26 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/smile_mistral_exp_v1.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/smile_mistral_exp_v2.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/smile_mistral_exp_v3.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/modelpool/smile_mistral_exp_v4.yaml +13 -0
- fusion_bench-0.2.9/fusion_bench_config/nyuv2_config.yaml +17 -0
- fusion_bench-0.2.9/fusion_bench_config/nyuv2_mtl_train.yaml +32 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml +31 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml +27 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml +11 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml +31 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_L14.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_val.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml +12 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL14.yaml +19 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TALL20.yaml +26 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar10.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_cifar100.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_dtd.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_emnist_letters.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_eurosat.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fashion_mnist.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_fer2013.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_food101.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_gtsrb.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_kmnist.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_mnist.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford-iiit-pet.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_oxford_flowers102_val.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_pcam.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_rendered-sst2.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_resisc45.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stanford-cars.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_stl10.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml +3 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml +18 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_clean.yaml +24 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_corrupted.yaml +27 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml +22 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/dummy.yaml +2 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/flan-t5_glue_text_generation.yaml +44 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/gpt-2_glue.yaml +39 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/nyuv2_taskpool.yaml +9 -0
- fusion_bench-0.2.9/fusion_bench_config/taskpool/reward_model_evaluation.yaml +18 -0
- fusion_bench-0.2.9/pyproject.toml +47 -0
- fusion_bench-0.2.9/setup.cfg +4 -0
- fusion_bench-0.2.9/tests/test_depth_upscaling.py +80 -0
- fusion_bench-0.2.9/tests/test_simple_average.py +47 -0
- fusion_bench-0.2.9/tests/test_weighed_ensemble.py +30 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Anke Tang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: fusion_bench
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: A Comprehensive Benchmark of Deep Model Fusion
|
|
5
|
+
Author-email: Anke Tang <tang.anke@foxmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Anke Tang
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Repository, https://github.com/tanganke/fusion_bench
|
|
29
|
+
Project-URL: Homepage, https://github.com/tanganke/fusion_bench
|
|
30
|
+
Project-URL: Issues, https://github.com/tanganke/fusion_bench/issues
|
|
31
|
+
Keywords: deep learning,model fusion,benchmark
|
|
32
|
+
Requires-Python: >=3.10
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
License-File: LICENSE
|
|
35
|
+
Requires-Dist: hydra-core
|
|
36
|
+
Requires-Dist: lightning
|
|
37
|
+
Requires-Dist: transformers
|
|
38
|
+
Requires-Dist: datasets
|
|
39
|
+
Requires-Dist: peft
|
|
40
|
+
Requires-Dist: huggingface_hub
|
|
41
|
+
Requires-Dist: matplotlib
|
|
42
|
+
Requires-Dist: tensorboard
|
|
43
|
+
Requires-Dist: tqdm
|
|
44
|
+
Requires-Dist: rich
|
|
45
|
+
Requires-Dist: scipy
|
|
46
|
+
Requires-Dist: h5py
|
|
47
|
+
Requires-Dist: pytest
|
|
48
|
+
|
|
49
|
+
<div align='center'>
|
|
50
|
+
|
|
51
|
+
# FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
|
|
52
|
+
|
|
53
|
+
[](http://arxiv.org/abs/2406.03280)
|
|
54
|
+
[](https://github.com/tanganke/fusion_bench/blob/main/LICENSE)
|
|
55
|
+
[](https://pypi.org/project/fusion-bench/)
|
|
56
|
+
[](https://pepy.tech/project/fusion-bench)
|
|
57
|
+
[](https://tanganke.github.io/fusion_bench/)
|
|
58
|
+
[](https://github.com/psf/black)
|
|
59
|
+
[](https://github.com/google/yamlfmt)
|
|
60
|
+
|
|
61
|
+
</div>
|
|
62
|
+
|
|
63
|
+
> [!TIP]
|
|
64
|
+
> Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
|
|
65
|
+
|
|
66
|
+
## Overview
|
|
67
|
+
|
|
68
|
+
FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
|
|
69
|
+
|
|
70
|
+
Projects based on FusionBench and news from the community (descending order of date):
|
|
71
|
+
|
|
72
|
+
<details>
|
|
73
|
+
<summary>Anke Tang, et al. Merging Models on the Fly Without Retraining: A Sequential Approach to Scalable Continual Model Merging. Jan 2025. https://arxiv.org/pdf/2501.09522</summary>
|
|
74
|
+
|
|
75
|
+
Deep model merging represents an emerging research direction that combines multiple fine-tuned models to harness their specialized capabilities across different tasks and domains. Current model merging techniques focus on merging all available models simultaneously, with weight interpolation-based methods being the predominant approaches. However, these conventional approaches are not well-suited for scenarios where models become available sequentially, and they often suffer from high memory requirements and potential interference between tasks. In this study, we propose a training-free projection-based continual merging method that processes models sequentially through orthogonal projections of weight matrices and adaptive scaling mechanisms. Our method operates by projecting new parameter updates onto subspaces orthogonal to existing merged parameter updates while using an adaptive scaling mechanism to maintain stable parameter distances, enabling efficient sequential integration of task-specific knowledge. Our approach maintains constant memory complexity to the number of models, minimizes interference between tasks through orthogonal projections, and retains the performance of previously merged models through adaptive task vector scaling. Extensive experiments on CLIP-ViT models demonstrate that our method achieves a 5-8% average accuracy improvement while maintaining robust performance in different task orderings.
|
|
76
|
+
</details>
|
|
77
|
+
|
|
78
|
+
<details>
|
|
79
|
+
<summary>Yongxian Wei, et al. Modeling Multi-Task Model Merging as Adaptive Projective Gradient Descent. Jan 2025. https://arxiv.org/abs/2501.01230</summary>
|
|
80
|
+
|
|
81
|
+
Merging multiple expert models offers a promising approach for performing multi-task learning without accessing their original data. Existing methods attempt to alleviate task conflicts by sparsifying task vectors or promoting orthogonality among them. However, they overlook the fundamental requirement of model merging: ensuring the merged model performs comparably to task-specific models on respective tasks. We find these methods inevitably discard task-specific information that, while causing conflicts, is crucial for performance. Based on our findings, we frame model merging as a constrained optimization problem (i.e., minimizing the gap between the merged model and individual models, subject to the constraint of retaining shared knowledge) and solve it via adaptive projective gradient descent. Specifically, we align the merged model with individual models by decomposing and reconstituting the loss function, alleviating conflicts through data-free optimization of task vectors. To retain shared knowledge, we optimize this objective by projecting gradients within a shared subspace spanning all tasks. Moreover, we view merging coefficients as adaptive learning rates and propose a task-aware, training-free strategy. Experiments show that our plug-andplay approach consistently outperforms previous methods, achieving state-of-the-art results across diverse architectures and tasks in both vision and NLP domains. Our code is available here.
|
|
82
|
+
</details>
|
|
83
|
+
|
|
84
|
+
<details>
|
|
85
|
+
<summary>Hongling Zheng, Li Shen, Anke Tang, Yong Luo et al. Learn From Model Beyond Fine-Tuning: A Survey. Nature Machine Intelligence. Jan, 2025. https://www.nature.com/articles/s42256-024-00961-0</summary>
|
|
86
|
+
|
|
87
|
+
> Foundation models (FM) have demonstrated remarkable performance across a wide range of tasks (especially in the fields of natural language processing and computer vision), primarily attributed to their ability to comprehend instructions and access extensive, high-quality data. This not only showcases their current effectiveness but also sets a promising trajectory towards the development of artificial general intelligence. Unfortunately, due to multiple constraints, the raw data of the model used for large model training are often inaccessible, so the use of end-to-end models for downstream tasks has become a new research trend, which we call Learn From Model (LFM) in this article. LFM focuses on the research, modification, and design of FM based on the model interface, so as to better understand the model structure and weights (in a black box environment), and to generalize the model to downstream tasks. The study of LFM techniques can be broadly categorized into five major areas: model tuning, model distillation, model reuse, meta learning and model editing. Each category encompasses a repertoire of methods and strategies that aim to enhance the capabilities and performance of FM. This paper gives a comprehensive review of the current methods based on FM from the perspective of LFM, in order to help readers better understand the current research status and ideas. To conclude, we summarize the survey by highlighting several critical areas for future exploration and addressing open issues that require further attention from the research community. The relevant papers we investigated in this article can be accessed at https://github.com/ruthless-man/Awesome-Learn-from-Model.
|
|
88
|
+
</details>
|
|
89
|
+
|
|
90
|
+
<details>
|
|
91
|
+
<summary>Li Shen, Anke Tang, Enneng Yang et al. Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging. Oct, 2024. https://github.com/EnnengYang/Efficient-WEMoE</summary>
|
|
92
|
+
|
|
93
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/b7e1279e-87fc-4016-8867-1bff7700e271">
|
|
94
|
+
|
|
95
|
+
</details>
|
|
96
|
+
<details>
|
|
97
|
+
<summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
|
|
98
|
+
|
|
99
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/679aaa7e-0506-4e09-a12a-345c12cf529f">
|
|
100
|
+
|
|
101
|
+
</details>
|
|
102
|
+
<details>
|
|
103
|
+
<summary>Anke Tang et al. SMILE: Zero-Shot Sparse Mixture of Low-Rank Experts Construction From Pre-Trained Foundation Models. Aug, 2024. http://arxiv.org/abs/2408.10174</summary>
|
|
104
|
+
|
|
105
|
+
Example notebooks can be found at [examples/smile_upscaling](examples/smile_upscaling).
|
|
106
|
+

|
|
107
|
+
|
|
108
|
+
</details>
|
|
109
|
+
|
|
110
|
+
## Installation
|
|
111
|
+
|
|
112
|
+
install from PyPI:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install fusion-bench
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
or install the latest version in development from github repository
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
git clone https://github.com/tanganke/fusion_bench.git
|
|
122
|
+
cd fusion_bench
|
|
123
|
+
|
|
124
|
+
pip install -e . # install the package in editable mode
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Introduction to Deep Model Fusion
|
|
128
|
+
|
|
129
|
+
Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.
|
|
130
|
+
It can be used to improve the performance and robustness of model or to combine the strengths of different models, such as fuse multiple task-specific models to create a multi-task model.
|
|
131
|
+
For a more detailed introduction to deep model fusion, you can refer to [W. Li, 2023, 'Deep Model Fusion: A Survey'](https://arxiv.org/abs/2309.15698). We also provide a brief overview of deep model fusion in [our documentation](https://tanganke.github.io/fusion_bench/).
|
|
132
|
+
In this benchmark, we evaluate the performance of different fusion methods on a variety of datasets and tasks.
|
|
133
|
+
|
|
134
|
+
## Project Structure
|
|
135
|
+
|
|
136
|
+
The project is structured as follows:
|
|
137
|
+
|
|
138
|
+
- `fusion_bench/`: the main package of the benchmark.
|
|
139
|
+
- `method`: contains the implementation of the fusion methods.
|
|
140
|
+
> **naming convention**: `fusion_bench/method/{method_name}/{variant}.py` contains the implementation of the specific method or its variants.
|
|
141
|
+
For example, `fusion_bench/method/regmean/clip_regmean.py` contains the implementation of the RegMean algorithm for CLIP vision models.
|
|
142
|
+
- `modelpool`: contains the implementation of the model pool, responsible for managing the models and dataset to be loaded.
|
|
143
|
+
- `taskpool`: contains the implementation of the task pool, responsible for evaluating the performance of models returned by the algorithm.
|
|
144
|
+
- `config/`: configuration files for the benchmark. We use [Hydra](https://hydra.cc/) to manage the configurations.
|
|
145
|
+
- `method`: configuration files for the fusion methods.
|
|
146
|
+
> **naming convention**: `config/method/{method_name}/{variant}.yaml` contains the configuration for the specific method or its variants.
|
|
147
|
+
- `modelpool`: configuration files for the model pool.
|
|
148
|
+
- `taskpool`: configuration files for the task pool.
|
|
149
|
+
- `model`: configuration files for the models.
|
|
150
|
+
- `dataset`: configuration files for the datasets.
|
|
151
|
+
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
|
|
152
|
+
- `examples/`: example scripts for running some of the experiments.
|
|
153
|
+
> **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
|
|
154
|
+
- `tests/`: unit tests for the benchmark.
|
|
155
|
+
|
|
156
|
+
## A Unified Command Line Interface
|
|
157
|
+
|
|
158
|
+
The `fusion_bench` command-line interface is a powerful tool for researchers and practitioners in the field of model fusion. It provides a streamlined way to experiment with various fusion algorithms, model combinations, and evaluation tasks.
|
|
159
|
+
By leveraging Hydra's configuration management, fusion_bench offers flexibility in setting up experiments and reproducibility in results.
|
|
160
|
+
The CLI's design allows for easy extension to new fusion methods, model types, and tasks, making it a versatile platform for advancing research in model fusion techniques.
|
|
161
|
+
|
|
162
|
+
Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_bench/) for more information.
|
|
163
|
+
|
|
164
|
+
## Implement your own model fusion algorithm
|
|
165
|
+
|
|
166
|
+
First, create a new Python file for the algorithm in the `fusion_bench/method` directory.
|
|
167
|
+
Following the naming convention, the file should be named `{method_name_or_class}/{variant}.py`.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
|
|
171
|
+
|
|
172
|
+
class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
|
|
173
|
+
"""
|
|
174
|
+
An example of a derived model fusion algorithm.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# _config_mapping maps the attribution to the corresponding key in the configuration file.
|
|
178
|
+
# this is optional and can be used to serialize the object to a configuration file.
|
|
179
|
+
# `self.config.hyperparam_1` will be mapped to the attribute `hyperparam_attr_1`.
|
|
180
|
+
_config_mapping = BaseModelFusionAlgorithm._config_mapping | {
|
|
181
|
+
"hyperparam_attr_1": "hyperparam_1",
|
|
182
|
+
"hyperparam_attr_2": "hyperparam_2",
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
def __init__(self, hyperparam_1, hyperparam_2, **kwargs):
|
|
186
|
+
self.hyperparam_attr_1 = hyperparam_1
|
|
187
|
+
self.hyperparam_attr_2 = hyperparam_2
|
|
188
|
+
super().__init__(**kwargs)
|
|
189
|
+
|
|
190
|
+
def run(self, modelpool: BaseModelPool):
|
|
191
|
+
# modelpool is an object that responsible for managing the models and dataset to be loaded.
|
|
192
|
+
# implement the fusion algorithm here.
|
|
193
|
+
raise NotImplementedError(
|
|
194
|
+
"DerivedModelFusionAlgorithm.run() is not implemented."
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
|
|
199
|
+
Here we assume the configuration file is placed at `config/method/your_algorithm_config.yaml`.
|
|
200
|
+
|
|
201
|
+
> [!NOTE]
|
|
202
|
+
> In fact, you can place your implementation anywhere you like, as long as the `_target_` in the configuration file points to the correct class.
|
|
203
|
+
|
|
204
|
+
```yaml
|
|
205
|
+
_target_: path_to_the_module.DerivedModelFusionAlgorithm
|
|
206
|
+
|
|
207
|
+
hyperparam_1: some_value
|
|
208
|
+
hyperparam_2: another_value
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Use the algorithm in the FusionBench:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
fusion_bench \
|
|
215
|
+
method=your_algorithm_config \
|
|
216
|
+
method.hyperparam_1=you_can_override_this \
|
|
217
|
+
method.hyperparam_2=and_this \
|
|
218
|
+
... # other configurations
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### :rocket: Quick Start for Experienced Users
|
|
222
|
+
|
|
223
|
+
We provide a project template for quickly starting a new fusion algorithm implementation here: [FusionBench Project Template](https://github.com/fusion-bench/fusion-bench-project-template).
|
|
224
|
+
|
|
225
|
+
<div align='center'>
|
|
226
|
+
|
|
227
|
+
Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-bench-project-template/generate) to initialize new repository.
|
|
228
|
+
|
|
229
|
+
</div>
|
|
230
|
+
|
|
231
|
+
### FusionBench Command Generator WebUI (for v0.1.x)
|
|
232
|
+
|
|
233
|
+
FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.
|
|
234
|
+
It provides an interactive way to select and customize FusionBench configurations, making it easier to run experiments with different settings.
|
|
235
|
+
[Read more here](https://tanganke.github.io/fusion_bench/cli/fusion_bench_webui/).
|
|
236
|
+
|
|
237
|
+

|
|
238
|
+
|
|
239
|
+
## Citation
|
|
240
|
+
|
|
241
|
+
If you find this benchmark useful, please consider citing our work:
|
|
242
|
+
|
|
243
|
+
```bibtex
|
|
244
|
+
@misc{tangFusionBenchComprehensiveBenchmark2024,
|
|
245
|
+
title = {{{FusionBench}}: {{A Comprehensive Benchmark}} of {{Deep Model Fusion}}},
|
|
246
|
+
shorttitle = {{{FusionBench}}},
|
|
247
|
+
author = {Tang, Anke and Shen, Li and Luo, Yong and Hu, Han and Du, Bo and Tao, Dacheng},
|
|
248
|
+
year = {2024},
|
|
249
|
+
month = jun,
|
|
250
|
+
number = {arXiv:2406.03280},
|
|
251
|
+
eprint = {2406.03280},
|
|
252
|
+
publisher = {arXiv},
|
|
253
|
+
url = {http://arxiv.org/abs/2406.03280},
|
|
254
|
+
archiveprefix = {arxiv},
|
|
255
|
+
langid = {english},
|
|
256
|
+
keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning}
|
|
257
|
+
}
|
|
258
|
+
```
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
<div align='center'>
|
|
2
|
+
|
|
3
|
+
# FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
|
|
4
|
+
|
|
5
|
+
[](http://arxiv.org/abs/2406.03280)
|
|
6
|
+
[](https://github.com/tanganke/fusion_bench/blob/main/LICENSE)
|
|
7
|
+
[](https://pypi.org/project/fusion-bench/)
|
|
8
|
+
[](https://pepy.tech/project/fusion-bench)
|
|
9
|
+
[](https://tanganke.github.io/fusion_bench/)
|
|
10
|
+
[](https://github.com/psf/black)
|
|
11
|
+
[](https://github.com/google/yamlfmt)
|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
> [!TIP]
|
|
16
|
+
> Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
|
|
21
|
+
|
|
22
|
+
Projects based on FusionBench and news from the community (descending order of date):
|
|
23
|
+
|
|
24
|
+
<details>
|
|
25
|
+
<summary>Anke Tang, et al. Merging Models on the Fly Without Retraining: A Sequential Approach to Scalable Continual Model Merging. Jan 2025. https://arxiv.org/pdf/2501.09522</summary>
|
|
26
|
+
|
|
27
|
+
Deep model merging represents an emerging research direction that combines multiple fine-tuned models to harness their specialized capabilities across different tasks and domains. Current model merging techniques focus on merging all available models simultaneously, with weight interpolation-based methods being the predominant approaches. However, these conventional approaches are not well-suited for scenarios where models become available sequentially, and they often suffer from high memory requirements and potential interference between tasks. In this study, we propose a training-free projection-based continual merging method that processes models sequentially through orthogonal projections of weight matrices and adaptive scaling mechanisms. Our method operates by projecting new parameter updates onto subspaces orthogonal to existing merged parameter updates while using an adaptive scaling mechanism to maintain stable parameter distances, enabling efficient sequential integration of task-specific knowledge. Our approach maintains constant memory complexity to the number of models, minimizes interference between tasks through orthogonal projections, and retains the performance of previously merged models through adaptive task vector scaling. Extensive experiments on CLIP-ViT models demonstrate that our method achieves a 5-8% average accuracy improvement while maintaining robust performance in different task orderings.
|
|
28
|
+
</details>
|
|
29
|
+
|
|
30
|
+
<details>
|
|
31
|
+
<summary>Yongxian Wei, et al. Modeling Multi-Task Model Merging as Adaptive Projective Gradient Descent. Jan 2025. https://arxiv.org/abs/2501.01230</summary>
|
|
32
|
+
|
|
33
|
+
Merging multiple expert models offers a promising approach for performing multi-task learning without accessing their original data. Existing methods attempt to alleviate task conflicts by sparsifying task vectors or promoting orthogonality among them. However, they overlook the fundamental requirement of model merging: ensuring the merged model performs comparably to task-specific models on respective tasks. We find these methods inevitably discard task-specific information that, while causing conflicts, is crucial for performance. Based on our findings, we frame model merging as a constrained optimization problem (i.e., minimizing the gap between the merged model and individual models, subject to the constraint of retaining shared knowledge) and solve it via adaptive projective gradient descent. Specifically, we align the merged model with individual models by decomposing and reconstituting the loss function, alleviating conflicts through data-free optimization of task vectors. To retain shared knowledge, we optimize this objective by projecting gradients within a shared subspace spanning all tasks. Moreover, we view merging coefficients as adaptive learning rates and propose a task-aware, training-free strategy. Experiments show that our plug-andplay approach consistently outperforms previous methods, achieving state-of-the-art results across diverse architectures and tasks in both vision and NLP domains. Our code is available here.
|
|
34
|
+
</details>
|
|
35
|
+
|
|
36
|
+
<details>
|
|
37
|
+
<summary>Hongling Zheng, Li Shen, Anke Tang, Yong Luo et al. Learn From Model Beyond Fine-Tuning: A Survey. Nature Machine Intelligence. Jan, 2025. https://www.nature.com/articles/s42256-024-00961-0</summary>
|
|
38
|
+
|
|
39
|
+
> Foundation models (FM) have demonstrated remarkable performance across a wide range of tasks (especially in the fields of natural language processing and computer vision), primarily attributed to their ability to comprehend instructions and access extensive, high-quality data. This not only showcases their current effectiveness but also sets a promising trajectory towards the development of artificial general intelligence. Unfortunately, due to multiple constraints, the raw data of the model used for large model training are often inaccessible, so the use of end-to-end models for downstream tasks has become a new research trend, which we call Learn From Model (LFM) in this article. LFM focuses on the research, modification, and design of FM based on the model interface, so as to better understand the model structure and weights (in a black box environment), and to generalize the model to downstream tasks. The study of LFM techniques can be broadly categorized into five major areas: model tuning, model distillation, model reuse, meta learning and model editing. Each category encompasses a repertoire of methods and strategies that aim to enhance the capabilities and performance of FM. This paper gives a comprehensive review of the current methods based on FM from the perspective of LFM, in order to help readers better understand the current research status and ideas. To conclude, we summarize the survey by highlighting several critical areas for future exploration and addressing open issues that require further attention from the research community. The relevant papers we investigated in this article can be accessed at https://github.com/ruthless-man/Awesome-Learn-from-Model.
|
|
40
|
+
</details>
|
|
41
|
+
|
|
42
|
+
<details>
|
|
43
|
+
<summary>Li Shen, Anke Tang, Enneng Yang et al. Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging. Oct, 2024. https://github.com/EnnengYang/Efficient-WEMoE</summary>
|
|
44
|
+
|
|
45
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/b7e1279e-87fc-4016-8867-1bff7700e271">
|
|
46
|
+
|
|
47
|
+
</details>
|
|
48
|
+
<details>
|
|
49
|
+
<summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
|
|
50
|
+
|
|
51
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/679aaa7e-0506-4e09-a12a-345c12cf529f">
|
|
52
|
+
|
|
53
|
+
</details>
|
|
54
|
+
<details>
|
|
55
|
+
<summary>Anke Tang et al. SMILE: Zero-Shot Sparse Mixture of Low-Rank Experts Construction From Pre-Trained Foundation Models. Aug, 2024. http://arxiv.org/abs/2408.10174</summary>
|
|
56
|
+
|
|
57
|
+
Example notebooks can be found at [examples/smile_upscaling](examples/smile_upscaling).
|
|
58
|
+

|
|
59
|
+
|
|
60
|
+
</details>
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
install from PyPI:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install fusion-bench
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
or install the latest version in development from github repository
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
git clone https://github.com/tanganke/fusion_bench.git
|
|
74
|
+
cd fusion_bench
|
|
75
|
+
|
|
76
|
+
pip install -e . # install the package in editable mode
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Introduction to Deep Model Fusion
|
|
80
|
+
|
|
81
|
+
Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.
|
|
82
|
+
It can be used to improve the performance and robustness of model or to combine the strengths of different models, such as fuse multiple task-specific models to create a multi-task model.
|
|
83
|
+
For a more detailed introduction to deep model fusion, you can refer to [W. Li, 2023, 'Deep Model Fusion: A Survey'](https://arxiv.org/abs/2309.15698). We also provide a brief overview of deep model fusion in [our documentation](https://tanganke.github.io/fusion_bench/).
|
|
84
|
+
In this benchmark, we evaluate the performance of different fusion methods on a variety of datasets and tasks.
|
|
85
|
+
|
|
86
|
+
## Project Structure
|
|
87
|
+
|
|
88
|
+
The project is structured as follows:
|
|
89
|
+
|
|
90
|
+
- `fusion_bench/`: the main package of the benchmark.
|
|
91
|
+
- `method`: contains the implementation of the fusion methods.
|
|
92
|
+
> **naming convention**: `fusion_bench/method/{method_name}/{variant}.py` contains the implementation of the specific method or its variants.
|
|
93
|
+
For example, `fusion_bench/method/regmean/clip_regmean.py` contains the implementation of the RegMean algorithm for CLIP vision models.
|
|
94
|
+
- `modelpool`: contains the implementation of the model pool, responsible for managing the models and dataset to be loaded.
|
|
95
|
+
- `taskpool`: contains the implementation of the task pool, responsible for evaluating the performance of models returned by the algorithm.
|
|
96
|
+
- `config/`: configuration files for the benchmark. We use [Hydra](https://hydra.cc/) to manage the configurations.
|
|
97
|
+
- `method`: configuration files for the fusion methods.
|
|
98
|
+
> **naming convention**: `config/method/{method_name}/{variant}.yaml` contains the configuration for the specific method or its variants.
|
|
99
|
+
- `modelpool`: configuration files for the model pool.
|
|
100
|
+
- `taskpool`: configuration files for the task pool.
|
|
101
|
+
- `model`: configuration files for the models.
|
|
102
|
+
- `dataset`: configuration files for the datasets.
|
|
103
|
+
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
|
|
104
|
+
- `examples/`: example scripts for running some of the experiments.
|
|
105
|
+
> **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
|
|
106
|
+
- `tests/`: unit tests for the benchmark.
|
|
107
|
+
|
|
108
|
+
## A Unified Command Line Interface
|
|
109
|
+
|
|
110
|
+
The `fusion_bench` command-line interface is a powerful tool for researchers and practitioners in the field of model fusion. It provides a streamlined way to experiment with various fusion algorithms, model combinations, and evaluation tasks.
|
|
111
|
+
By leveraging Hydra's configuration management, fusion_bench offers flexibility in setting up experiments and reproducibility in results.
|
|
112
|
+
The CLI's design allows for easy extension to new fusion methods, model types, and tasks, making it a versatile platform for advancing research in model fusion techniques.
|
|
113
|
+
|
|
114
|
+
Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_bench/) for more information.
|
|
115
|
+
|
|
116
|
+
## Implement your own model fusion algorithm
|
|
117
|
+
|
|
118
|
+
First, create a new Python file for the algorithm in the `fusion_bench/method` directory.
|
|
119
|
+
Following the naming convention, the file should be named `{method_name_or_class}/{variant}.py`.
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
|
|
123
|
+
|
|
124
|
+
class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
|
|
125
|
+
"""
|
|
126
|
+
An example of a derived model fusion algorithm.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
# _config_mapping maps the attribution to the corresponding key in the configuration file.
|
|
130
|
+
# this is optional and can be used to serialize the object to a configuration file.
|
|
131
|
+
# `self.config.hyperparam_1` will be mapped to the attribute `hyperparam_attr_1`.
|
|
132
|
+
_config_mapping = BaseModelFusionAlgorithm._config_mapping | {
|
|
133
|
+
"hyperparam_attr_1": "hyperparam_1",
|
|
134
|
+
"hyperparam_attr_2": "hyperparam_2",
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
def __init__(self, hyperparam_1, hyperparam_2, **kwargs):
|
|
138
|
+
self.hyperparam_attr_1 = hyperparam_1
|
|
139
|
+
self.hyperparam_attr_2 = hyperparam_2
|
|
140
|
+
super().__init__(**kwargs)
|
|
141
|
+
|
|
142
|
+
def run(self, modelpool: BaseModelPool):
|
|
143
|
+
# modelpool is an object that responsible for managing the models and dataset to be loaded.
|
|
144
|
+
# implement the fusion algorithm here.
|
|
145
|
+
raise NotImplementedError(
|
|
146
|
+
"DerivedModelFusionAlgorithm.run() is not implemented."
|
|
147
|
+
)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
|
|
151
|
+
Here we assume the configuration file is placed at `config/method/your_algorithm_config.yaml`.
|
|
152
|
+
|
|
153
|
+
> [!NOTE]
|
|
154
|
+
> In fact, you can place your implementation anywhere you like, as long as the `_target_` in the configuration file points to the correct class.
|
|
155
|
+
|
|
156
|
+
```yaml
|
|
157
|
+
_target_: path_to_the_module.DerivedModelFusionAlgorithm
|
|
158
|
+
|
|
159
|
+
hyperparam_1: some_value
|
|
160
|
+
hyperparam_2: another_value
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
Use the algorithm in the FusionBench:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
fusion_bench \
|
|
167
|
+
method=your_algorithm_config \
|
|
168
|
+
method.hyperparam_1=you_can_override_this \
|
|
169
|
+
method.hyperparam_2=and_this \
|
|
170
|
+
... # other configurations
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### :rocket: Quick Start for Experienced Users
|
|
174
|
+
|
|
175
|
+
We provide a project template for quickly starting a new fusion algorithm implementation here: [FusionBench Project Template](https://github.com/fusion-bench/fusion-bench-project-template).
|
|
176
|
+
|
|
177
|
+
<div align='center'>
|
|
178
|
+
|
|
179
|
+
Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-bench-project-template/generate) to initialize new repository.
|
|
180
|
+
|
|
181
|
+
</div>
|
|
182
|
+
|
|
183
|
+
### FusionBench Command Generator WebUI (for v0.1.x)
|
|
184
|
+
|
|
185
|
+
FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.
|
|
186
|
+
It provides an interactive way to select and customize FusionBench configurations, making it easier to run experiments with different settings.
|
|
187
|
+
[Read more here](https://tanganke.github.io/fusion_bench/cli/fusion_bench_webui/).
|
|
188
|
+
|
|
189
|
+

|
|
190
|
+
|
|
191
|
+
## Citation
|
|
192
|
+
|
|
193
|
+
If you find this benchmark useful, please consider citing our work:
|
|
194
|
+
|
|
195
|
+
```bibtex
|
|
196
|
+
@misc{tangFusionBenchComprehensiveBenchmark2024,
|
|
197
|
+
title = {{{FusionBench}}: {{A Comprehensive Benchmark}} of {{Deep Model Fusion}}},
|
|
198
|
+
shorttitle = {{{FusionBench}}},
|
|
199
|
+
author = {Tang, Anke and Shen, Li and Luo, Yong and Hu, Han and Du, Bo and Tao, Dacheng},
|
|
200
|
+
year = {2024},
|
|
201
|
+
month = jun,
|
|
202
|
+
number = {arXiv:2406.03280},
|
|
203
|
+
eprint = {2406.03280},
|
|
204
|
+
publisher = {arXiv},
|
|
205
|
+
url = {http://arxiv.org/abs/2406.03280},
|
|
206
|
+
archiveprefix = {arxiv},
|
|
207
|
+
langid = {english},
|
|
208
|
+
keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning}
|
|
209
|
+
}
|
|
210
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# flake8: noqa: F401
|
|
2
|
+
from . import (
|
|
3
|
+
constants,
|
|
4
|
+
dataset,
|
|
5
|
+
method,
|
|
6
|
+
metrics,
|
|
7
|
+
mixins,
|
|
8
|
+
modelpool,
|
|
9
|
+
models,
|
|
10
|
+
optim,
|
|
11
|
+
programs,
|
|
12
|
+
taskpool,
|
|
13
|
+
tasks,
|
|
14
|
+
utils,
|
|
15
|
+
)
|
|
16
|
+
from .method import BaseAlgorithm, BaseModelFusionAlgorithm
|
|
17
|
+
from .modelpool import BaseModelPool
|
|
18
|
+
from .models import separate_io
|
|
19
|
+
from .taskpool import BaseTaskPool
|
|
20
|
+
from .utils import parse_dtype, print_parameters, timeit_context
|
|
File without changes
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
from omegaconf import DictConfig
|
|
4
|
+
|
|
5
|
+
from .base_algorithm import ModelFusionAlgorithm
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AlgorithmFactory:
|
|
9
|
+
"""
|
|
10
|
+
Factory class to create and manage different model fusion algorithms.
|
|
11
|
+
|
|
12
|
+
This class provides methods to create algorithms based on a given configuration,
|
|
13
|
+
register new algorithms, and list available algorithms.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
_aglorithms = {
|
|
17
|
+
# single task learning (fine-tuning)
|
|
18
|
+
"clip_finetune": ".classification.clip_finetune.ImageClassificationFineTuningForCLIP",
|
|
19
|
+
# analysis
|
|
20
|
+
# model merging methods
|
|
21
|
+
"clip_task_wise_adamerging": ".adamerging.clip_task_wise_adamerging.CLIPTaskWiseAdaMergingAlgorithm",
|
|
22
|
+
"clip_layer_wise_adamerging": ".adamerging.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
|
|
23
|
+
"singular_projection_merging": "fusion_bench.method.smile_upscaling.singular_projection_merging.SingularProjectionMergingAlgorithm",
|
|
24
|
+
"clip_layer_wise_adamerging_surgery": ".surgery.clip_layer_wise_adamerging_surgery.CLIPLayerWiseAdaMergingSurgeryAlgorithm",
|
|
25
|
+
# plug-and-play model merging methods
|
|
26
|
+
"clip_concrete_task_arithmetic": ".concrete_subspace.clip_concrete_task_arithmetic.ConcreteTaskArithmeticAlgorithmForCLIP",
|
|
27
|
+
"clip_concrete_task_wise_adamerging": ".concrete_subspace.clip_concrete_adamerging.ConcreteTaskWiseAdaMergingForCLIP",
|
|
28
|
+
"clip_concrete_layer_wise_adamerging": ".concrete_subspace.clip_concrete_adamerging.ConcreteLayerWiseAdaMergingForCLIP",
|
|
29
|
+
# model mixing methods
|
|
30
|
+
"clip_weight_ensembling_moe": ".we_moe.clip_we_moe.CLIPWeightEnsemblingMoEAlgorithm",
|
|
31
|
+
"sparse_clip_weight_ensembling_moe": "fusion_bench.method.SparseCLIPWeightEnsemblingMoEAlgorithm",
|
|
32
|
+
"smile_mistral_upscaling": ".smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm",
|
|
33
|
+
"rankone_moe": ".rankone_moe.clip_rankone_moe.CLIPRankOneMoEAlgorithm",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def create_algorithm(method_config: DictConfig) -> ModelFusionAlgorithm:
|
|
38
|
+
"""
|
|
39
|
+
Create an instance of a model fusion algorithm based on the provided configuration.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
method_config (DictConfig): The configuration for the algorithm. Must contain a 'name' attribute that specifies the type of the algorithm.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
ModelFusionAlgorithm: An instance of the specified algorithm.
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
ValueError: If 'name' attribute is not found in the configuration or does not match any known algorithm names.
|
|
49
|
+
"""
|
|
50
|
+
warnings.warn(
|
|
51
|
+
"AlgorithmFactory.create_algorithm() is deprecated and will be removed in future versions. "
|
|
52
|
+
"Please implement new model fusion algorithm using `fusion_bench.method.BaseModelFusionAlgorithm` instead.",
|
|
53
|
+
DeprecationWarning,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
from fusion_bench.utils import import_object
|
|
57
|
+
|
|
58
|
+
algorithm_name = method_config.name
|
|
59
|
+
if algorithm_name not in AlgorithmFactory._aglorithms:
|
|
60
|
+
raise ValueError(
|
|
61
|
+
f"Unknown algorithm: {algorithm_name}, available algorithms: {AlgorithmFactory._aglorithms.keys()}."
|
|
62
|
+
"You can register a new algorithm using `AlgorithmFactory.register_algorithm()` method."
|
|
63
|
+
)
|
|
64
|
+
algorithm_cls = AlgorithmFactory._aglorithms[algorithm_name]
|
|
65
|
+
if isinstance(algorithm_cls, str):
|
|
66
|
+
if algorithm_cls.startswith("."):
|
|
67
|
+
algorithm_cls = f"fusion_bench.method.{algorithm_cls[1:]}"
|
|
68
|
+
algorithm_cls = import_object(algorithm_cls)
|
|
69
|
+
return algorithm_cls(method_config)
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def register_algorithm(name: str, algorithm_cls):
|
|
73
|
+
"""
|
|
74
|
+
Register a new algorithm with the factory.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name (str): The name of the algorithm.
|
|
78
|
+
algorithm_cls: The class of the algorithm to register.
|
|
79
|
+
"""
|
|
80
|
+
AlgorithmFactory._aglorithms[name] = algorithm_cls
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def available_algorithms(cls):
|
|
84
|
+
"""
|
|
85
|
+
Get a list of available algorithms.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
list: A list of available algorithm names.
|
|
89
|
+
"""
|
|
90
|
+
return list(cls._aglorithms.keys())
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def load_algorithm_from_config(method_config: DictConfig):
|
|
94
|
+
"""
|
|
95
|
+
Loads an algorithm based on the provided configuration.
|
|
96
|
+
|
|
97
|
+
The function checks the 'name' attribute of the configuration and returns an instance of the corresponding algorithm.
|
|
98
|
+
If the 'name' attribute is not found or does not match any known algorithm names, a ValueError is raised.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
method_config (DictConfig): The configuration for the algorithm. Must contain a 'name' attribute that specifies the type of the algorithm.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
An instance of the specified algorithm.
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
ValueError: If 'name' attribute is not found in the configuration or does not match any known algorithm names.
|
|
108
|
+
"""
|
|
109
|
+
return AlgorithmFactory.create_algorithm(method_config)
|