fusion-bench 0.2.2__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/PKG-INFO +25 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/README.md +24 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/__init__.py +1 -1
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/taskpool/clip_image_classification.py +12 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py +9 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/gsm8k.py +3 -0
- fusion_bench-0.2.3/fusion_bench/dataset/imdb.py +11 -0
- fusion_bench-0.2.3/fusion_bench/dataset/llama/alpaca.py +142 -0
- fusion_bench-0.2.3/fusion_bench/dataset/llama/openai.py +160 -0
- fusion_bench-0.2.3/fusion_bench/dataset/llama/sharegpt.py +141 -0
- fusion_bench-0.2.3/fusion_bench/dataset/llama/squad.py +125 -0
- fusion_bench-0.2.3/fusion_bench/dataset/llama/wikitext.py +89 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/nyuv2.py +9 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/__init__.py +9 -7
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ada_svd/clip_vision.py +2 -2
- fusion_bench-0.2.3/fusion_bench/method/adamerging/llama_adamerging.py +330 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/task_wise_adamerging.py +13 -1
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/analysis/__init__.py +1 -0
- fusion_bench-0.2.3/fusion_bench/method/analysis/task_vector_cos_similarity.py +172 -0
- fusion_bench-0.2.3/fusion_bench/method/analysis/task_vector_violin_plot.py +205 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/base_algorithm.py +5 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dare/__init__.py +1 -0
- fusion_bench-0.2.3/fusion_bench/method/dare/simple_average.py +48 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dare/task_arithmetic.py +2 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dawe/dawe_for_clip.py +11 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/depth_upscaling/depth_upscaling.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dummy.py +5 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ensemble.py +32 -5
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/fisher_merging/fisher_merging.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/linear/__init__.py +1 -0
- fusion_bench-0.2.3/fusion_bench/method/linear/expo.py +79 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/linear/linear_interpolation.py +4 -4
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/linear/simple_average_for_llama.py +3 -3
- fusion_bench-0.2.3/fusion_bench/method/lm_finetune/causal_lm_pretrain.py +7 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/mixture_of_experts/mixtral_upcycling.py +5 -5
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/model_recombination.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/llama_magnitude_prune.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/llama_random_prune.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/llama_wanda_prune.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/magnitude_diff_pruning.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/prune_utils.py +10 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/clip_pwe_moe.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/regmean/regmean.py +2 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/simple_average.py +12 -5
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/slerp/slerp.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/smile_upscaling/smile_upscaling.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/sparselo/sparselo.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/task_arithmetic/task_arithmetic.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ties_merging/ties_merging.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/trust_region/clip_task_arithmetic.py +2 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/weighted_average/llama.py +3 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/weighted_average/weighted_average.py +3 -3
- fusion_bench-0.2.3/fusion_bench/mixins/optim/adamw_with_warmup.py +42 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/base_pool.py +1 -1
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/causal_lm/causal_lm.py +3 -1
- fusion_bench-0.2.3/fusion_bench/models/llama/__init__.py +16 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/model_utils/embedding.py +70 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/model_utils/liger_kernel.py +86 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/model_utils/misc.py +112 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/model_utils/mod.py +52 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/model_utils/visual.py +242 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/patcher.py +80 -0
- fusion_bench-0.2.3/fusion_bench/models/llama/tokenizer_loader.py +157 -0
- fusion_bench-0.2.3/fusion_bench/models/wrappers/ensemble.py +183 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/wrappers/layer_wise_fusion.py +107 -2
- fusion_bench-0.2.3/fusion_bench/optim/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/programs/fabric_fusion_program.py +7 -4
- fusion_bench-0.2.3/fusion_bench/scripts/__init__.py +0 -0
- fusion_bench-0.2.3/fusion_bench/scripts/clip/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/__init__.py +2 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/clip_vision/taskpool.py +15 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/dummy.py +23 -8
- fusion_bench-0.2.3/fusion_bench/taskpool/llama/__init__.py +1 -0
- fusion_bench-0.2.3/fusion_bench/taskpool/llama/test_generation.py +185 -0
- fusion_bench-0.2.3/fusion_bench/tasks/flan_t5_text_generation/__init__.py +0 -0
- fusion_bench-0.2.3/fusion_bench/utils/__init__.py +13 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/cache_utils.py +2 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/data.py +3 -2
- fusion_bench-0.2.3/fusion_bench/utils/devices.py +210 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/dtype.py +27 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/functools.py +12 -1
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/instantiate.py +24 -15
- fusion_bench-0.2.3/fusion_bench/utils/misc.py +18 -0
- fusion_bench-0.2.3/fusion_bench/utils/packages.py +84 -0
- fusion_bench-0.2.3/fusion_bench/utils/parameters.py +258 -0
- fusion_bench-0.2.3/fusion_bench/utils/path.py +7 -0
- fusion_bench-0.2.3/fusion_bench/utils/strenum/__init__.py +325 -0
- fusion_bench-0.2.3/fusion_bench/utils/strenum/_name_mangler.py +127 -0
- fusion_bench-0.2.3/fusion_bench/utils/strenum/_version.py +556 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/PKG-INFO +25 -2
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/SOURCES.txt +39 -1
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/requires.txt +0 -1
- fusion_bench-0.2.3/fusion_bench_config/README.md +12 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/fabric_model_fusion.yaml +2 -0
- fusion_bench-0.2.3/fusion_bench_config/llama_model_fusion.yaml +17 -0
- fusion_bench-0.2.3/fusion_bench_config/method/adamerging/clip.yaml +23 -0
- fusion_bench-0.2.3/fusion_bench_config/method/adamerging/llama_sft.yaml +33 -0
- fusion_bench-0.2.3/fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml +6 -0
- fusion_bench-0.2.3/fusion_bench_config/method/analysis/task_vector_violin_plot.yaml +6 -0
- fusion_bench-0.2.3/fusion_bench_config/method/dare/simple_average.yaml +5 -0
- fusion_bench-0.2.3/fusion_bench_config/method/linear/expo.yaml +8 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/pyproject.toml +1 -2
- fusion_bench-0.2.2/fusion_bench/method/analysis/task_vector_cos_similarity.py +0 -98
- fusion_bench-0.2.2/fusion_bench/models/wrappers/ensemble.py +0 -90
- fusion_bench-0.2.2/fusion_bench/utils/__init__.py +0 -30
- fusion_bench-0.2.2/fusion_bench/utils/devices.py +0 -107
- fusion_bench-0.2.2/fusion_bench/utils/parameters.py +0 -126
- fusion_bench-0.2.2/fusion_bench_config/README.md +0 -4
- fusion_bench-0.2.2/fusion_bench_config/method/task_vector_cos_similarity.yaml +0 -3
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/LICENSE +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/method/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/method/base_algorithm.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/modelpool/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/modelpool/base_pool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/modelpool/huggingface_clip_vision.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/taskpool/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/taskpool/base_pool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/constants/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/constants/paths.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/clip_dataset.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/gpt2_glue.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/dataset/image_dataset.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/metrics → fusion_bench-0.2.3/fusion_bench/dataset/llama}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ada_svd/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/clip_task_wise_adamerging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/entropy_loss.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/adamerging/layer_wise_adamerging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/classification/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/classification/clip_finetune.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/concrete_subspace/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/concrete_subspace/clip_concrete_task_arithmetic.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dare/utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dawe/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dawe/warppers/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/dawe/warppers/dawe_model.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/depth_upscaling/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/depth_upscaling/depth_upscaling_for_llama.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/fisher_merging/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/fisher_merging/clip_fisher_merging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/linear/task_arithmetic_for_llama.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/models/linearized → fusion_bench-0.2.3/fusion_bench/method/lm_finetune}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/mixture_of_experts/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/mixture_of_experts/mixtral_merging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/ablate.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/data.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/eval.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/layerwrapper.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/prune.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/prune_opt.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pruning/wanda_utils/sparsegpt.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/module.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/phn/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/phn/solvers.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/pwe_moe/utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/regmean/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/regmean/clip_regmean.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/regmean/gpt2_regmean.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/slerp/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/slerp/slerp_utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/smile_upscaling/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/smile_upscaling/singular_projection_merging.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/sparse_we_moe/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/sparse_we_moe/sparse_we_moe.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/sparselo/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/task_arithmetic/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ties_merging/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/ties_merging/ties_merging_utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/trust_region/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/trust_region/utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/we_moe/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/we_moe/clip_we_moe.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/we_moe/we_moe.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/method/weighted_average/__init__.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/models/nyuv2 → fusion_bench-0.2.3/fusion_bench/metrics}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/depth.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/loss.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/noise.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/normal.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/nyuv2/segmentation.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/text_to_image_generation/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/text_to_image_generation/aesthetic_scorer.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/text_to_image_generation/compressibility.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/metrics/text_to_image_generation/pickscore_scorer.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/clip_classification.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/lightning_fabric.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/models/smile_moe → fusion_bench-0.2.3/fusion_bench/mixins/optim}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/rich_live.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/serialization.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/mixins/simple_profiler.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/PeftModelForSeq2SeqLM.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/causal_lm/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/clip_vision/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/clip_vision/modelpool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/huggingface_automodel.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/huggingface_gpt2_classification.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/nyuv2_modelpool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/seq2seq_lm/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/modelpool/seq2seq_lm/modelpool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/hf_clip.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/models/wrappers → fusion_bench-0.2.3/fusion_bench/models/linearized}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/linearized/linearized_model_utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/linearized/vision_model.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/optim → fusion_bench-0.2.3/fusion_bench/models/llama/model_utils}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/masks/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/masks/mask_model.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/configuration_losparse_llama.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/losparse_linear.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/modeling_losparse_llama.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/register.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_losparse_llama/utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_smile_mistral/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_smile_mistral/configuration_smile_mistral.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/modeling_smile_mistral/register.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/scripts → fusion_bench-0.2.3/fusion_bench/models/nyuv2}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/nyuv2/aspp.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/nyuv2/lightning_module.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/nyuv2/resnet.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/nyuv2/resnet_dilated.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/parameter_dict.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/separate_io.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/scripts/clip → fusion_bench-0.2.3/fusion_bench/models/smile_moe}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/smile_moe/linear.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/sparse_we_moe.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/we_moe.py +0 -0
- {fusion_bench-0.2.2/fusion_bench/tasks/flan_t5_text_generation → fusion_bench-0.2.3/fusion_bench/models/wrappers}/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/models/wrappers/task_wise_fusion.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/optim/mezo.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/programs/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/programs/base_program.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/scripts/cli.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/scripts/clip/convert_checkpoint.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/scripts/imgui.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/scripts/nyuv2_mtl_train.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/scripts/webui.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/base_pool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/clip_vision/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/gpt2_text_classification.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/taskpool/nyuv2_taskpool.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/base_task.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/classification.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/__init__.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/cifar10.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/cifar100.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/clip_dataset.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/dtd.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/eurosat.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/flower102.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/gtsrb.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/imagenet.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/mnist.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/oxford_iiit_pet.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/rendered_sst2.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/resisc45.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/stanford_cars.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/stl10.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/sun397.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/svhn.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/clip_classification/tiny_imagenet.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/flan_t5_text_generation/datasets_preprocess.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/auto.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/hydra_utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/json.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/lazy_imports.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/pylogger.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/rich_utils.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/state_dict_arithmetic.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/timer.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/utils/type.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/dependency_links.txt +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/entry_points.txt +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench.egg-info/top_level.txt +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/cifar10.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/cifar100.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/the_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/test/tiny-imagenet.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/cifar10.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/cifar100.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/the_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/train/tiny-imagenet.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/image_classification/val/the_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/question_answering/search_qa.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/question_answering/test/search_qa.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/question_answering/train/MetaMathQA.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/question_answering/train/search_qa.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/question_answering/val/search_qa.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/summarization/test/xsum.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/summarization/train/xsum.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/summarization/val/xsum.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/summarization/xsum.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/test/gsm-hard.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/test/gsm8k.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/test/gsm8k_question_label.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/train/CodeAlpaca-20k.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/train/gsm8k.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/dataset/text_generation/train/gsm8k_question_label.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/fabric/auto.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/fabric_logger/tensorboard_logger.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/hydra/default.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/hydra/help/fusion_bench_help.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/hydra/job_logging/rich_logging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/llama_magnitude_pruning.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/llama_weighted_average.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/ada_svd/clip_vision.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/adamerging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/clip_finetune.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/concrete_subspace/clip_concrete_task_arithmetic.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/dare/task_arithmetic.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/dawe/dawe_for_clip.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/depth_upscaling.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/dummy.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/ensemble/max_model_predictor.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/ensemble/simple_ensemble.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/ensemble/weighted_ensemble.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/fisher_merging/fisher_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/linear/linear_interpolation.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/linear/simple_average_for_llama.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/linear/weighted_average.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/linear/weighted_average_for_llama.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/magnitude_diff_pruning.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/mixtral_moe_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/mixtral_moe_upscaling.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/model_recombination.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/pruning/llama_magnitude_pruning.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/pruning/llama_random_pruning.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/pruning/llama_wanda_pruning.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/pwe_moe_ls_for_clip.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/regmean/clip_regmean.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/regmean/gpt2_regmean.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/regmean/regmean.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/simple_average.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/slerp/slerp.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/task_arithmetic.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/ties_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch16_svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-base-patch32_svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_dtd.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_eight_tasks.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_eurosat.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_gtsrb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/clip-vit/generate_vit_model_config.sh +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-cola.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-cola_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mnli.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mnli_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mrpc.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-mrpc_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qnli.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qnli_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qqp.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-qqp_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-rte.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-rte_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-sst2.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-sst2_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-stsb.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-base_glue-stsb_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-cola_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-mnli_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-mrpc_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-qnli_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-qqp_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-rte_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-sst2_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/flan-t5-large_glue-stsb_lora-16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/model/flan-t5/generate_flan-t5.sh +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/_template.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_generalization_exp1.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_generalization_exp2.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_mtl.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_robustness_clean.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/Seq2SeqLMPool/_template.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/automodelpool.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/gpt-2_glue.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/mixtral_moe_merging.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/nyuv2_modelpool.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/smile_mistral_exp_v1.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/smile_mistral_exp_v2.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/smile_mistral_exp_v3.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/modelpool/smile_mistral_exp_v4.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/nyuv2_config.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/nyuv2_mtl_train.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_L14.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_val.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_with_control_task.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_clean.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_corrupted.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/dummy.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/flan-t5_glue_text_generation.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/gpt-2_glue.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench_config/taskpool/nyuv2_taskpool.yaml +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/setup.cfg +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/tests/test_depth_upscaling.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/tests/test_simple_average.py +0 -0
- {fusion_bench-0.2.2 → fusion_bench-0.2.3}/tests/test_weighed_ensemble.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fusion_bench
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: A Comprehensive Benchmark of Deep Model Fusion
|
|
5
5
|
Author-email: Anke Tang <tang.anke@foxmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -33,7 +33,6 @@ Requires-Python: >=3.10
|
|
|
33
33
|
Description-Content-Type: text/markdown
|
|
34
34
|
License-File: LICENSE
|
|
35
35
|
Requires-Dist: hydra-core
|
|
36
|
-
Requires-Dist: torch>=2.0.0
|
|
37
36
|
Requires-Dist: lightning
|
|
38
37
|
Requires-Dist: transformers
|
|
39
38
|
Requires-Dist: datasets
|
|
@@ -68,6 +67,12 @@ FusionBench is a benchmark suite designed to evaluate the performance of various
|
|
|
68
67
|
|
|
69
68
|
Projects based on FusionBench:
|
|
70
69
|
|
|
70
|
+
<details>
|
|
71
|
+
<summary>Li Shen, Anke Tang, Enneng Yang et al. Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging. Oct, 2024. https://github.com/EnnengYang/Efficient-WEMoE</summary>
|
|
72
|
+
|
|
73
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/b7e1279e-87fc-4016-8867-1bff7700e271">
|
|
74
|
+
|
|
75
|
+
</details>
|
|
71
76
|
<details>
|
|
72
77
|
<summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
|
|
73
78
|
|
|
@@ -111,9 +116,21 @@ In this benchmark, we evaluate the performance of different fusion methods on a
|
|
|
111
116
|
The project is structured as follows:
|
|
112
117
|
|
|
113
118
|
- `fusion_bench/`: the main package of the benchmark.
|
|
119
|
+
- `method`: contains the implementation of the fusion methods.
|
|
120
|
+
> **naming convention**: `fusion_bench/method/{method_name}/{variant}.py` contains the implementation of the specific method or its variants.
|
|
121
|
+
For example, `fusion_bench/method/regmean/clip_regmean.py` contains the implementation of the RegMean algorithm for CLIP vision models.
|
|
122
|
+
- `modelpool`: contains the implementation of the model pool, responsible for managing the models and dataset to be loaded.
|
|
123
|
+
- `taskpool`: contains the implementation of the task pool, responsible for evaluating the performance of models returned by the algorithm.
|
|
114
124
|
- `config/`: configuration files for the benchmark. We use [Hydra](https://hydra.cc/) to manage the configurations.
|
|
125
|
+
- `method`: configuration files for the fusion methods.
|
|
126
|
+
> **naming convention**: `config/method/{method_name}/{variant}.yaml` contains the configuration for the specific method or its variants.
|
|
127
|
+
- `modelpool`: configuration files for the model pool.
|
|
128
|
+
- `taskpool`: configuration files for the task pool.
|
|
129
|
+
- `model`: configuration files for the models.
|
|
130
|
+
- `dataset`: configuration files for the datasets.
|
|
115
131
|
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
|
|
116
132
|
- `examples/`: example scripts for running some of the experiments.
|
|
133
|
+
> **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
|
|
117
134
|
- `tests/`: unit tests for the benchmark.
|
|
118
135
|
|
|
119
136
|
## A Unified Command Line Interface
|
|
@@ -126,6 +143,9 @@ Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_
|
|
|
126
143
|
|
|
127
144
|
## Implement your own model fusion algorithm
|
|
128
145
|
|
|
146
|
+
First, create a new Python file for the algorithm in the `fusion_bench/method` directory.
|
|
147
|
+
Following the naming convention, the file should be named `{method_name_or_class}/{variant}.py`.
|
|
148
|
+
|
|
129
149
|
```python
|
|
130
150
|
from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
|
|
131
151
|
|
|
@@ -158,6 +178,9 @@ class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
|
|
|
158
178
|
A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
|
|
159
179
|
Here we assume the configuration file is placed at `config/method/your_algorithm_config.yaml`.
|
|
160
180
|
|
|
181
|
+
> [!NOTE]
|
|
182
|
+
> In fact, you can place your implementation anywhere you like, as long as the `_target_` in the configuration file points to the correct class.
|
|
183
|
+
|
|
161
184
|
```yaml
|
|
162
185
|
_target_: path_to_the_module.DerivedModelFusionAlgorithm
|
|
163
186
|
|
|
@@ -19,6 +19,12 @@ FusionBench is a benchmark suite designed to evaluate the performance of various
|
|
|
19
19
|
|
|
20
20
|
Projects based on FusionBench:
|
|
21
21
|
|
|
22
|
+
<details>
|
|
23
|
+
<summary>Li Shen, Anke Tang, Enneng Yang et al. Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging. Oct, 2024. https://github.com/EnnengYang/Efficient-WEMoE</summary>
|
|
24
|
+
|
|
25
|
+
<img width="1018" alt="image" src="https://github.com/user-attachments/assets/b7e1279e-87fc-4016-8867-1bff7700e271">
|
|
26
|
+
|
|
27
|
+
</details>
|
|
22
28
|
<details>
|
|
23
29
|
<summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
|
|
24
30
|
|
|
@@ -62,9 +68,21 @@ In this benchmark, we evaluate the performance of different fusion methods on a
|
|
|
62
68
|
The project is structured as follows:
|
|
63
69
|
|
|
64
70
|
- `fusion_bench/`: the main package of the benchmark.
|
|
71
|
+
- `method`: contains the implementation of the fusion methods.
|
|
72
|
+
> **naming convention**: `fusion_bench/method/{method_name}/{variant}.py` contains the implementation of the specific method or its variants.
|
|
73
|
+
For example, `fusion_bench/method/regmean/clip_regmean.py` contains the implementation of the RegMean algorithm for CLIP vision models.
|
|
74
|
+
- `modelpool`: contains the implementation of the model pool, responsible for managing the models and dataset to be loaded.
|
|
75
|
+
- `taskpool`: contains the implementation of the task pool, responsible for evaluating the performance of models returned by the algorithm.
|
|
65
76
|
- `config/`: configuration files for the benchmark. We use [Hydra](https://hydra.cc/) to manage the configurations.
|
|
77
|
+
- `method`: configuration files for the fusion methods.
|
|
78
|
+
> **naming convention**: `config/method/{method_name}/{variant}.yaml` contains the configuration for the specific method or its variants.
|
|
79
|
+
- `modelpool`: configuration files for the model pool.
|
|
80
|
+
- `taskpool`: configuration files for the task pool.
|
|
81
|
+
- `model`: configuration files for the models.
|
|
82
|
+
- `dataset`: configuration files for the datasets.
|
|
66
83
|
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
|
|
67
84
|
- `examples/`: example scripts for running some of the experiments.
|
|
85
|
+
> **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
|
|
68
86
|
- `tests/`: unit tests for the benchmark.
|
|
69
87
|
|
|
70
88
|
## A Unified Command Line Interface
|
|
@@ -77,6 +95,9 @@ Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_
|
|
|
77
95
|
|
|
78
96
|
## Implement your own model fusion algorithm
|
|
79
97
|
|
|
98
|
+
First, create a new Python file for the algorithm in the `fusion_bench/method` directory.
|
|
99
|
+
Following the naming convention, the file should be named `{method_name_or_class}/{variant}.py`.
|
|
100
|
+
|
|
80
101
|
```python
|
|
81
102
|
from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
|
|
82
103
|
|
|
@@ -109,6 +130,9 @@ class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
|
|
|
109
130
|
A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
|
|
110
131
|
Here we assume the configuration file is placed at `config/method/your_algorithm_config.yaml`.
|
|
111
132
|
|
|
133
|
+
> [!NOTE]
|
|
134
|
+
> In fact, you can place your implementation anywhere you like, as long as the `_target_` in the configuration file points to the correct class.
|
|
135
|
+
|
|
112
136
|
```yaml
|
|
113
137
|
_target_: path_to_the_module.DerivedModelFusionAlgorithm
|
|
114
138
|
|
|
@@ -13,7 +13,7 @@ from . import (
|
|
|
13
13
|
tasks,
|
|
14
14
|
utils,
|
|
15
15
|
)
|
|
16
|
-
from .method import BaseModelFusionAlgorithm
|
|
16
|
+
from .method import BaseAlgorithm, BaseModelFusionAlgorithm
|
|
17
17
|
from .modelpool import BaseModelPool
|
|
18
18
|
from .models import separate_io
|
|
19
19
|
from .taskpool import BaseTaskPool
|
{fusion_bench-0.2.2 → fusion_bench-0.2.3}/fusion_bench/compat/taskpool/clip_image_classification.py
RENAMED
|
@@ -83,6 +83,12 @@ class CLIPImageClassificationTask(ClassificationTask):
|
|
|
83
83
|
def evaluate(self, clip_model: CLIPModel):
|
|
84
84
|
"""
|
|
85
85
|
Evaluate the model on the image classification task.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
clip_model (CLIPModel): The CLIP model to evaluate.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
dict: A dictionary containing the evaluation results.
|
|
86
92
|
"""
|
|
87
93
|
classifier = HFCLIPClassifier(
|
|
88
94
|
clip_model=clip_model, processor=self._clip_processor
|
|
@@ -151,6 +157,12 @@ class CLIPImageClassificationTaskPool(TaskPool):
|
|
|
151
157
|
def evaluate(self, model: CLIPVisionModel):
|
|
152
158
|
"""
|
|
153
159
|
Evaluate the model on the image classification task.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
model (CLIPVisionModel): The vision model to evaluate.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
dict: A dictionary containing the evaluation results for each task.
|
|
154
166
|
"""
|
|
155
167
|
# if the fabric is not set, and we have a GPU, create a fabric instance
|
|
156
168
|
if self._fabric is None and torch.cuda.is_available():
|
|
@@ -149,6 +149,15 @@ class FlanT5GLUETextGenerationTaskPool(LightningFabricMixin, TaskPool):
|
|
|
149
149
|
raise ValueError(f"Unknown task {task_config.name}")
|
|
150
150
|
|
|
151
151
|
def evaluate(self, model: T5ForConditionalGeneration):
|
|
152
|
+
"""
|
|
153
|
+
Evaluate the model on the FlanT5 GLUE text generation tasks.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
model (T5ForConditionalGeneration): The model to evaluate.
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
dict: A dictionary containing the evaluation results for each task.
|
|
160
|
+
"""
|
|
152
161
|
if not isinstance(model, T5ForConditionalGeneration):
|
|
153
162
|
log.warning(
|
|
154
163
|
f"Model is not an instance of T5ForConditionalGeneration, but {type(model)}"
|
|
@@ -16,6 +16,9 @@ def load_gsm8k_question_label_data(
|
|
|
16
16
|
{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?',
|
|
17
17
|
'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}
|
|
18
18
|
|
|
19
|
+
Args:
|
|
20
|
+
dataset_name (Literal["train", "test", "train_socratic", "test_socratic"]): The name of the dataset to load.
|
|
21
|
+
|
|
19
22
|
Returns:
|
|
20
23
|
questions (List[str]): List of questions.
|
|
21
24
|
labels (List[float]): List of labels. For example, the label for the above example is `72.0`.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from datasets import load_dataset, load_from_disk
|
|
4
|
+
from transformers import PreTrainedTokenizer
|
|
5
|
+
|
|
6
|
+
import fusion_bench
|
|
7
|
+
import os
|
|
8
|
+
import logging
|
|
9
|
+
from trl import SFTConfig, SFTTrainer
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
from datasets import Dataset, load_dataset, load_from_disk
|
|
6
|
+
from transformers import PreTrainedTokenizer
|
|
7
|
+
|
|
8
|
+
import fusion_bench
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def tokenize_alpaca_dataset(
|
|
14
|
+
dataset: Dataset,
|
|
15
|
+
tokenizer: PreTrainedTokenizer,
|
|
16
|
+
max_length: int = 2048,
|
|
17
|
+
input_template: str = "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n",
|
|
18
|
+
input_no_template: str = "### Instruction:\n{instruction}\n\n### Response:\n",
|
|
19
|
+
batch_size: int = 1000,
|
|
20
|
+
) -> Dataset:
|
|
21
|
+
"""
|
|
22
|
+
Tokenize Alpaca format dataset with customizable options in batches.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
dataset: The input dataset in Alpaca format
|
|
26
|
+
tokenizer: The tokenizer to use
|
|
27
|
+
max_length: Maximum sequence length
|
|
28
|
+
input_template: Template for samples with input field
|
|
29
|
+
input_no_template: Template for samples without input field
|
|
30
|
+
batch_size: Size of batches to process at once
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Tokenized dataset
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def prepare_samples(samples: Dict[str, List[str]]) -> Dict[str, List[List[int]]]:
|
|
37
|
+
# Format prompts based on whether input field exists
|
|
38
|
+
prompts = []
|
|
39
|
+
for instruction, input_text in zip(
|
|
40
|
+
samples["instruction"], samples.get("input", [])
|
|
41
|
+
):
|
|
42
|
+
if input_text.strip():
|
|
43
|
+
prompt = input_template.format(
|
|
44
|
+
instruction=instruction.strip(), input=input_text.strip()
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
prompt = input_no_template.format(instruction=instruction.strip())
|
|
48
|
+
prompts.append(prompt)
|
|
49
|
+
|
|
50
|
+
responses = [output.strip() for output in samples["output"]]
|
|
51
|
+
|
|
52
|
+
# Tokenize prompts and responses
|
|
53
|
+
prompt_tokens = tokenizer(
|
|
54
|
+
prompts, add_special_tokens=False, padding=False, truncation=False
|
|
55
|
+
)
|
|
56
|
+
response_tokens = tokenizer(
|
|
57
|
+
responses, add_special_tokens=False, padding=False, truncation=False
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
input_ids, labels = [], []
|
|
61
|
+
|
|
62
|
+
# Process each sample in the batch
|
|
63
|
+
for prompt_toks, response_toks in zip(
|
|
64
|
+
prompt_tokens["input_ids"], response_tokens["input_ids"]
|
|
65
|
+
):
|
|
66
|
+
# Create input_ids with EOS token
|
|
67
|
+
sample_input_ids = prompt_toks + response_toks + [tokenizer.eos_token_id]
|
|
68
|
+
|
|
69
|
+
# Create labels: -100 for prompt, actual tokens for response
|
|
70
|
+
label = [-100] * len(prompt_toks) + response_toks + [tokenizer.eos_token_id]
|
|
71
|
+
|
|
72
|
+
# Truncate if exceeds max length
|
|
73
|
+
if len(sample_input_ids) > max_length:
|
|
74
|
+
sample_input_ids = sample_input_ids[:max_length]
|
|
75
|
+
label = label[:max_length]
|
|
76
|
+
|
|
77
|
+
input_ids.append(sample_input_ids)
|
|
78
|
+
labels.append(label)
|
|
79
|
+
|
|
80
|
+
# Use tokenizer's padding feature for input_ids and attention_mask
|
|
81
|
+
padded_results = tokenizer.pad(
|
|
82
|
+
{"input_ids": input_ids},
|
|
83
|
+
padding=True,
|
|
84
|
+
max_length=max_length,
|
|
85
|
+
return_attention_mask=True,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Pad labels with -100
|
|
89
|
+
padded_labels = []
|
|
90
|
+
for label in labels:
|
|
91
|
+
padding_length = max_length - len(label)
|
|
92
|
+
if padding_length > 0:
|
|
93
|
+
label = label + [-100] * padding_length
|
|
94
|
+
padded_labels.append(label)
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
"input_ids": padded_results["input_ids"],
|
|
98
|
+
"attention_mask": padded_results["attention_mask"],
|
|
99
|
+
"labels": padded_labels,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if tokenizer.pad_token is None:
|
|
103
|
+
log.warning("Tokenizer does not have a `pad_token`. Set it the `eos_token`.")
|
|
104
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
105
|
+
|
|
106
|
+
# Process the entire dataset in batches
|
|
107
|
+
tokenized_dataset = dataset.map(
|
|
108
|
+
prepare_samples,
|
|
109
|
+
batched=True,
|
|
110
|
+
batch_size=batch_size,
|
|
111
|
+
remove_columns=dataset.column_names,
|
|
112
|
+
desc="Tokenizing dataset",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return tokenized_dataset
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def load_tokenized_alpaca_dataset_from_json(
|
|
119
|
+
data_files: str,
|
|
120
|
+
tokenizer: PreTrainedTokenizer,
|
|
121
|
+
max_length: int,
|
|
122
|
+
split: Optional[str] = "train",
|
|
123
|
+
cache_path: Optional[str] = None,
|
|
124
|
+
):
|
|
125
|
+
if cache_path is not None and fusion_bench.utils.path.path_is_dir_and_not_empty(
|
|
126
|
+
cache_path
|
|
127
|
+
):
|
|
128
|
+
datasets = load_from_disk(cache_path)
|
|
129
|
+
if split is None:
|
|
130
|
+
return datasets
|
|
131
|
+
else:
|
|
132
|
+
return datasets[split]
|
|
133
|
+
else:
|
|
134
|
+
assert (
|
|
135
|
+
tokenizer is not None
|
|
136
|
+
), "Cached dataset not found. Need tokenizer to process the raw data."
|
|
137
|
+
|
|
138
|
+
dataset = load_dataset("json", data_files=data_files)
|
|
139
|
+
if split is not None:
|
|
140
|
+
dataset = dataset[split]
|
|
141
|
+
dataset = tokenize_alpaca_dataset(dataset, tokenizer, max_length=max_length)
|
|
142
|
+
return dataset
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List
|
|
3
|
+
|
|
4
|
+
from datasets import Dataset
|
|
5
|
+
from transformers import PreTrainedTokenizer
|
|
6
|
+
|
|
7
|
+
log = logging.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def tokenize_messages_dataset(
|
|
11
|
+
dataset: Dataset,
|
|
12
|
+
tokenizer: PreTrainedTokenizer,
|
|
13
|
+
max_length: int = 2048,
|
|
14
|
+
padding: bool = True,
|
|
15
|
+
system_template: str = "### System: {message}\n",
|
|
16
|
+
user_template: str = "## User: {message}\n",
|
|
17
|
+
assistant_template: str = "## Assistant: {message}\n",
|
|
18
|
+
) -> Dataset:
|
|
19
|
+
R"""
|
|
20
|
+
Tokenize dataset with messages format supporting loss calculation flags.
|
|
21
|
+
|
|
22
|
+
write a script to tokenizer datasets with the following format:
|
|
23
|
+
|
|
24
|
+
Examples:
|
|
25
|
+
|
|
26
|
+
```json
|
|
27
|
+
{
|
|
28
|
+
"messages": [
|
|
29
|
+
{
|
|
30
|
+
"role": "system",
|
|
31
|
+
"content": "XXX",
|
|
32
|
+
"calculate_loss": 0
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"role": "system",
|
|
36
|
+
"content": "XXX",
|
|
37
|
+
"calculate_loss": 0
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"role": "user",
|
|
41
|
+
"content": "XXX",
|
|
42
|
+
"calculate_loss": 0
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"role": "assistant",
|
|
46
|
+
"content": "XXX",
|
|
47
|
+
"calculate_loss": 1
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"create_info": [
|
|
51
|
+
{
|
|
52
|
+
"date": "20240830",
|
|
53
|
+
"owner": "l00470783",
|
|
54
|
+
"within_source_id": 0,
|
|
55
|
+
"describe": "...",
|
|
56
|
+
"source": [
|
|
57
|
+
"..."
|
|
58
|
+
],
|
|
59
|
+
"language": "zh"
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"feature_info": {
|
|
63
|
+
"domain": "...",
|
|
64
|
+
"tags": [
|
|
65
|
+
"..."
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
"source_file": "..."
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
dataset: Input dataset with messages format
|
|
74
|
+
tokenizer: The tokenizer to use
|
|
75
|
+
max_length: Maximum sequence length
|
|
76
|
+
system_template: Template for system messages
|
|
77
|
+
user_template: Template for user messages
|
|
78
|
+
assistant_template: Template for assistant messages
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Tokenized dataset
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def build_prompt(messages: List[Dict[str, str]]) -> tuple[str, str]:
|
|
85
|
+
"""
|
|
86
|
+
Build prompt and get response that needs loss calculation.
|
|
87
|
+
Returns conversation history and the response to calculate loss on.
|
|
88
|
+
"""
|
|
89
|
+
history = ""
|
|
90
|
+
response = ""
|
|
91
|
+
|
|
92
|
+
for message in messages:
|
|
93
|
+
role = message["role"]
|
|
94
|
+
content = message["content"].strip()
|
|
95
|
+
calculate_loss = message.get("calculate_loss", 0)
|
|
96
|
+
|
|
97
|
+
# Build conversation history
|
|
98
|
+
if role == "system":
|
|
99
|
+
history += system_template.format(message=content)
|
|
100
|
+
elif role == "user":
|
|
101
|
+
history += user_template.format(message=content)
|
|
102
|
+
elif role == "assistant":
|
|
103
|
+
if calculate_loss:
|
|
104
|
+
# If this assistant message needs loss calculation,
|
|
105
|
+
# save it as response and don't add to history
|
|
106
|
+
response = content
|
|
107
|
+
else:
|
|
108
|
+
# Otherwise add to conversation history
|
|
109
|
+
history += assistant_template.format(message=content)
|
|
110
|
+
|
|
111
|
+
return history, response
|
|
112
|
+
|
|
113
|
+
def prepare_sample(sample: dict) -> dict:
|
|
114
|
+
# Get conversation history and response
|
|
115
|
+
history, response = build_prompt(sample["messages"])
|
|
116
|
+
|
|
117
|
+
# Tokenize prompt and response
|
|
118
|
+
prompt_tokens = tokenizer.encode(history, add_special_tokens=False)
|
|
119
|
+
response_tokens = tokenizer.encode(response, add_special_tokens=False)
|
|
120
|
+
|
|
121
|
+
# Create input_ids with EOS token
|
|
122
|
+
input_ids = prompt_tokens + response_tokens + [tokenizer.eos_token_id]
|
|
123
|
+
|
|
124
|
+
# Create attention mask
|
|
125
|
+
attention_mask = [1] * len(input_ids)
|
|
126
|
+
|
|
127
|
+
# Create labels: -100 for prompt, actual tokens for response
|
|
128
|
+
labels = (
|
|
129
|
+
[-100] * len(prompt_tokens) + response_tokens + [tokenizer.eos_token_id]
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Truncate if exceeds max length
|
|
133
|
+
if len(input_ids) > max_length:
|
|
134
|
+
input_ids = input_ids[:max_length]
|
|
135
|
+
attention_mask = attention_mask[:max_length]
|
|
136
|
+
labels = labels[:max_length]
|
|
137
|
+
|
|
138
|
+
# Pad if necessary
|
|
139
|
+
if padding:
|
|
140
|
+
padding_length = max_length - len(input_ids)
|
|
141
|
+
if padding_length > 0:
|
|
142
|
+
input_ids.extend([tokenizer.pad_token_id] * padding_length)
|
|
143
|
+
attention_mask.extend([0] * padding_length)
|
|
144
|
+
labels.extend([-100] * padding_length)
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"input_ids": input_ids,
|
|
148
|
+
"attention_mask": attention_mask,
|
|
149
|
+
"labels": labels,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if tokenizer.pad_token is None:
|
|
153
|
+
log.warning("Tokenizer does not have a `pad_token`. Set it the `eos_token`.")
|
|
154
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
155
|
+
# Process the dataset
|
|
156
|
+
tokenized_dataset = dataset.map(
|
|
157
|
+
prepare_sample, remove_columns=dataset.column_names, desc="Tokenizing dataset"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return tokenized_dataset
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from datasets import Dataset
|
|
6
|
+
from transformers import PreTrainedTokenizer
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def tokenize_sharegpt_dataset(
|
|
12
|
+
dataset: Dataset,
|
|
13
|
+
tokenizer: PreTrainedTokenizer,
|
|
14
|
+
max_length: int = 2048,
|
|
15
|
+
padding: bool = True,
|
|
16
|
+
system_template: str = "### System: {system}\n\n",
|
|
17
|
+
tools_template: str = "### Tools: {tools}\n\n",
|
|
18
|
+
human_template: str = "### Human: {message}\n",
|
|
19
|
+
assistant_template: str = "### Assistant: {message}\n",
|
|
20
|
+
function_template: str = "### Function Call: {message}\n",
|
|
21
|
+
observation_template: str = "### Observation: {message}\n",
|
|
22
|
+
) -> Dataset:
|
|
23
|
+
"""
|
|
24
|
+
Tokenize ShareGPT format dataset with support for system prompts, tools, and tool calls.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
dataset: Input dataset in ShareGPT format.
|
|
28
|
+
tokenizer: The tokenizer to use.
|
|
29
|
+
max_length: Maximum sequence length.
|
|
30
|
+
padding: Whether to pad the tokenized inputs to `max_length`.
|
|
31
|
+
system_template: Template for system messages.
|
|
32
|
+
tools_template: Template for tool descriptions.
|
|
33
|
+
human_template: Template for human messages.
|
|
34
|
+
assistant_template: Template for assistant responses.
|
|
35
|
+
function_template: Template for function calls.
|
|
36
|
+
observation_template: Template for function observations.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tokenized dataset
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def build_conversation(
|
|
43
|
+
conversations: List[Dict[str, str]],
|
|
44
|
+
system: Optional[str] = None,
|
|
45
|
+
tools: Optional[str] = None,
|
|
46
|
+
) -> tuple[List[int], List[int]]:
|
|
47
|
+
"""
|
|
48
|
+
Build prompt and response token ids from conversations.
|
|
49
|
+
Returns (prompt_tokens, response_tokens) for the last assistant message.
|
|
50
|
+
"""
|
|
51
|
+
# Initialize conversation history
|
|
52
|
+
history = ""
|
|
53
|
+
|
|
54
|
+
# Add system prompt if provided
|
|
55
|
+
if system:
|
|
56
|
+
history += system_template.format(system=system.strip())
|
|
57
|
+
|
|
58
|
+
# Add tools description if provided
|
|
59
|
+
if tools:
|
|
60
|
+
history += tools_template.format(tools=tools.strip())
|
|
61
|
+
|
|
62
|
+
prompt_tokens = []
|
|
63
|
+
response_tokens = []
|
|
64
|
+
|
|
65
|
+
for i, message in enumerate(conversations):
|
|
66
|
+
msg_from = message["from"]
|
|
67
|
+
msg_value = message["value"].strip()
|
|
68
|
+
|
|
69
|
+
# If this is the last assistant message
|
|
70
|
+
if msg_from == "gpt" and i == len(conversations) - 1:
|
|
71
|
+
# Tokenize the current history as prompt
|
|
72
|
+
prompt_tokens = tokenizer.encode(history, add_special_tokens=False)
|
|
73
|
+
# Tokenize the assistant's message as response
|
|
74
|
+
response_tokens = tokenizer.encode(
|
|
75
|
+
assistant_template.format(message=msg_value),
|
|
76
|
+
add_special_tokens=False,
|
|
77
|
+
)
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
# Build conversation history
|
|
81
|
+
if msg_from == "human":
|
|
82
|
+
history += human_template.format(message=msg_value)
|
|
83
|
+
elif msg_from == "gpt":
|
|
84
|
+
history += assistant_template.format(message=msg_value)
|
|
85
|
+
elif msg_from == "function_call":
|
|
86
|
+
history += function_template.format(message=msg_value)
|
|
87
|
+
elif msg_from == "observation":
|
|
88
|
+
history += observation_template.format(message=msg_value)
|
|
89
|
+
else:
|
|
90
|
+
log.warning(f"Unkonwn role: {msg_from}")
|
|
91
|
+
|
|
92
|
+
return prompt_tokens, response_tokens
|
|
93
|
+
|
|
94
|
+
def prepare_sample(sample: dict) -> dict:
|
|
95
|
+
# Get prompt and response tokens
|
|
96
|
+
prompt_tokens, response_tokens = build_conversation(
|
|
97
|
+
conversations=sample["conversations"],
|
|
98
|
+
system=sample.get("system"), # system prompt is optional
|
|
99
|
+
tools=sample.get("tools"), # tools description is optional
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Create input_ids with EOS token
|
|
103
|
+
input_ids = prompt_tokens + response_tokens + [tokenizer.eos_token_id]
|
|
104
|
+
|
|
105
|
+
# Create attention mask (1 for tokens, 0 for padding)
|
|
106
|
+
attention_mask = [1] * len(input_ids)
|
|
107
|
+
|
|
108
|
+
# Create labels (-100 for prompt, actual tokens for response)
|
|
109
|
+
labels = (
|
|
110
|
+
[-100] * len(prompt_tokens) + response_tokens + [tokenizer.eos_token_id]
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Truncate if exceeds max length
|
|
114
|
+
if len(input_ids) > max_length:
|
|
115
|
+
input_ids = input_ids[:max_length]
|
|
116
|
+
attention_mask = attention_mask[:max_length]
|
|
117
|
+
labels = labels[:max_length]
|
|
118
|
+
|
|
119
|
+
# Pad if necessary
|
|
120
|
+
if padding:
|
|
121
|
+
padding_length = max_length - len(input_ids)
|
|
122
|
+
if padding_length > 0:
|
|
123
|
+
input_ids.extend([tokenizer.pad_token_id] * padding_length)
|
|
124
|
+
attention_mask.extend([0] * padding_length)
|
|
125
|
+
labels.extend([-100] * padding_length)
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
"input_ids": input_ids,
|
|
129
|
+
"attention_mask": attention_mask,
|
|
130
|
+
"labels": labels,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if tokenizer.pad_token is None:
|
|
134
|
+
log.warning("Tokenizer does not have a `pad_token`. Set it the `eos_token`.")
|
|
135
|
+
tokenizer.pad_token = tokenizer.eos_token
|
|
136
|
+
# Process the dataset
|
|
137
|
+
tokenized_dataset = dataset.map(
|
|
138
|
+
prepare_sample, remove_columns=dataset.column_names, desc="Tokenizing dataset"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return tokenized_dataset
|