fusion-bench 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. fusion_bench/compat/method/__init__.py +3 -1
  2. fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py +4 -1
  3. fusion_bench/constants/clip_vision.py +22 -0
  4. fusion_bench/dataset/clip_dataset.py +10 -2
  5. fusion_bench/dataset/gsm8k.py +2 -2
  6. fusion_bench/method/__init__.py +12 -2
  7. fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +1 -1
  8. fusion_bench/method/adamerging/clip_task_wise_adamerging.py +1 -29
  9. fusion_bench/method/doge_ta/__init__.py +2 -0
  10. fusion_bench/method/{DOGE_TA → doge_ta}/clip_layer_wise_adamerging.py +1 -1
  11. fusion_bench/method/{DOGE_TA/DOGE_TA.py → doge_ta/doge_ta.py} +1 -1
  12. fusion_bench/method/fisher_merging/fisher_merging.py +29 -17
  13. fusion_bench/method/gossip/__init__.py +3 -0
  14. fusion_bench/method/gossip/clip_layer_wise_gossip.py +43 -0
  15. fusion_bench/method/gossip/clip_task_wise_gossip.py +190 -0
  16. fusion_bench/method/gossip/entropy_loss.py +25 -0
  17. fusion_bench/method/gossip/flan_t5_layer_wise_gossip.py +388 -0
  18. fusion_bench/method/gossip/layer_wise_gossip.py +434 -0
  19. fusion_bench/method/gossip/min_norm_solvers.py +227 -0
  20. fusion_bench/method/gossip/task_wise_gossip.py +265 -0
  21. fusion_bench/method/gossip/utils.py +74 -0
  22. fusion_bench/method/isotropic_merging/__init__.py +1 -1
  23. fusion_bench/method/opcm/opcm.py +102 -84
  24. fusion_bench/method/opcm/task_arithmetic.py +35 -21
  25. fusion_bench/method/opcm/ties_merging.py +71 -52
  26. fusion_bench/method/pwe_moe/module.py +1 -1
  27. fusion_bench/method/pwe_moe/openclip_pwe_moe.py +476 -0
  28. fusion_bench/method/regmean/regmean.py +25 -17
  29. fusion_bench/method/smile_upscaling/__init__.py +1 -1
  30. fusion_bench/method/smile_upscaling/smile_upscaling.py +13 -10
  31. fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +7 -0
  32. fusion_bench/method/task_arithmetic/task_arithmetic.py +8 -6
  33. fusion_bench/method/ties_merging/ties_merging.py +36 -31
  34. fusion_bench/method/we_moe/we_moe.py +14 -15
  35. fusion_bench/mixins/__init__.py +6 -3
  36. fusion_bench/mixins/hydra_config.py +49 -0
  37. fusion_bench/mixins/openclip_classification.py +11 -0
  38. fusion_bench/mixins/simple_profiler.py +4 -2
  39. fusion_bench/modelpool/__init__.py +3 -1
  40. fusion_bench/modelpool/base_pool.py +2 -2
  41. fusion_bench/modelpool/openclip_vision/__init__.py +1 -0
  42. fusion_bench/modelpool/openclip_vision/modelpool.py +255 -0
  43. fusion_bench/models/open_clip/__init__.py +6 -0
  44. fusion_bench/models/open_clip/modeling.py +176 -0
  45. fusion_bench/models/open_clip/utils.py +311 -0
  46. fusion_bench/models/open_clip/variables_and_paths.py +56 -0
  47. fusion_bench/models/parameter_dict.py +54 -13
  48. fusion_bench/models/wrappers/layer_wise_fusion.py +1 -46
  49. fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py +4 -119
  50. fusion_bench/scripts/nyuv2_mtl_train.py +1 -1
  51. fusion_bench/taskpool/__init__.py +5 -3
  52. fusion_bench/taskpool/clip_vision/__init__.py +1 -0
  53. fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +2 -30
  54. fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py +102 -0
  55. fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py +2 -30
  56. fusion_bench/taskpool/clip_vision/taskpool.py +1 -2
  57. fusion_bench/taskpool/clip_vision/utils/__init__.py +0 -0
  58. fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py +65 -0
  59. fusion_bench/taskpool/gpt2_text_classification.py +30 -1
  60. fusion_bench/taskpool/openclip_vision/__init__.py +1 -0
  61. fusion_bench/taskpool/openclip_vision/openclip_taskpool.py +196 -0
  62. fusion_bench/utils/data.py +12 -0
  63. fusion_bench/utils/devices.py +14 -0
  64. fusion_bench/utils/instantiate.py +12 -0
  65. fusion_bench/utils/misc.py +9 -2
  66. fusion_bench/utils/packages.py +14 -0
  67. fusion_bench/utils/parameters.py +1 -1
  68. fusion_bench/utils/tensorboard.py +1 -1
  69. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/METADATA +15 -2
  70. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/RECORD +198 -158
  71. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/WHEEL +1 -1
  72. fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -2
  73. fusion_bench_config/dataset/image_classification/test/TALL20.yaml +0 -1
  74. fusion_bench_config/dataset/image_classification/test/emnist_letters.yaml +0 -1
  75. fusion_bench_config/dataset/image_classification/test/fashion_mnist.yaml +1 -1
  76. fusion_bench_config/dataset/image_classification/train/TALL20.yaml +0 -1
  77. fusion_bench_config/dataset/image_classification/train/fashion_mnist.yaml +1 -1
  78. fusion_bench_config/fabric/auto.yaml +0 -1
  79. fusion_bench_config/fabric/llama_ddp.yaml +0 -1
  80. fusion_bench_config/fabric/llama_fsdp.yaml +0 -1
  81. fusion_bench_config/fabric/llama_peft_fsdp.yaml +0 -1
  82. fusion_bench_config/fabric/strategy/deepspeed.yaml +0 -1
  83. fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +0 -1
  84. fusion_bench_config/fabric_model_fusion.yaml +0 -1
  85. fusion_bench_config/llama_full_finetune.yaml +0 -2
  86. fusion_bench_config/llama_model_fusion.yaml +0 -2
  87. fusion_bench_config/method/ada_svd/clip_vision.yaml +0 -1
  88. fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml +0 -5
  89. fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml +0 -5
  90. fusion_bench_config/method/adamerging/llama_sft.yaml +0 -2
  91. fusion_bench_config/method/adamerging.yaml +2 -2
  92. fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml +0 -1
  93. fusion_bench_config/method/analysis/task_vector_violin_plot.yaml +0 -1
  94. fusion_bench_config/method/classification/clip_continual_finetune.yaml +0 -1
  95. fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml +0 -1
  96. fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml +0 -1
  97. fusion_bench_config/method/concrete_subspace/clip_post_defense_AWM.yaml +1 -12
  98. fusion_bench_config/method/concrete_subspace/clip_post_defense_SAU.yaml +1 -12
  99. fusion_bench_config/method/concrete_subspace/clip_safe_concrete_layer_wise_adamerging.yaml +1 -10
  100. fusion_bench_config/method/concrete_subspace/clip_safe_concrete_task_arithmetic.yaml +1 -14
  101. fusion_bench_config/method/dare/simple_average.yaml +0 -1
  102. fusion_bench_config/method/dare/task_arithmetic.yaml +0 -1
  103. fusion_bench_config/method/dare/ties_merging.yaml +0 -2
  104. fusion_bench_config/method/dawe/dawe_for_clip.yaml +0 -3
  105. fusion_bench_config/method/{DOGE_TA/DOGE_TA.yaml → doge_ta/doge_ta.yaml} +1 -1
  106. fusion_bench_config/method/ensemble/max_model_predictor.yaml +1 -1
  107. fusion_bench_config/method/ensemble/simple_ensemble.yaml +0 -1
  108. fusion_bench_config/method/ensemble/weighted_ensemble.yaml +0 -1
  109. fusion_bench_config/method/gossip/layer_wise_clip.yaml +30 -0
  110. fusion_bench_config/method/gossip/layer_wise_flan_t5.yaml +25 -0
  111. fusion_bench_config/method/isotropic_merging/iso_c.yaml +0 -1
  112. fusion_bench_config/method/isotropic_merging/iso_cts.yaml +0 -1
  113. fusion_bench_config/method/linear/linear_interpolation.yaml +0 -1
  114. fusion_bench_config/method/linear/llama_expo.yaml +0 -3
  115. fusion_bench_config/method/linear/llama_expo_with_dare.yaml +0 -5
  116. fusion_bench_config/method/linear/weighted_average.yaml +0 -1
  117. fusion_bench_config/method/linear/weighted_average_for_llama.yaml +0 -1
  118. fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +0 -4
  119. fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +0 -4
  120. fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +0 -6
  121. fusion_bench_config/method/mixtral_moe_upscaling.yaml +1 -2
  122. fusion_bench_config/method/model_recombination.yaml +0 -1
  123. fusion_bench_config/method/opcm/opcm.yaml +0 -1
  124. fusion_bench_config/method/opcm/task_arithmetic.yaml +0 -2
  125. fusion_bench_config/method/opcm/ties_merging.yaml +0 -2
  126. fusion_bench_config/method/opcm/weight_average.yaml +0 -1
  127. fusion_bench_config/method/pwe_moe/epo_for_openclip.yaml +30 -0
  128. fusion_bench_config/method/pwe_moe/ls_for_openclip.yaml +30 -0
  129. fusion_bench_config/method/{pwe_moe_ls_for_clip.yaml → pwe_moe/pwe_moe_ls_for_clip.yaml} +7 -6
  130. fusion_bench_config/method/rankone_moe/rankone_moe.yaml +1 -3
  131. fusion_bench_config/method/regmean/gpt2_regmean.yaml +0 -1
  132. fusion_bench_config/method/slerp/slerp.yaml +0 -2
  133. fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +1 -1
  134. fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +1 -1
  135. fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +1 -1
  136. fusion_bench_config/method/surgery/adamerging_surgery.yaml +1 -2
  137. fusion_bench_config/method/task_arithmetic.yaml +1 -1
  138. fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +0 -1
  139. fusion_bench_config/method/ties_merging.yaml +1 -1
  140. fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml +0 -1
  141. fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml +0 -8
  142. fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml +1 -1
  143. fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml +1 -1
  144. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml +1 -1
  145. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml +1 -1
  146. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_pcam.yaml +1 -1
  147. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml +1 -1
  148. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +1 -1
  149. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +1 -1
  150. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stl10.yaml +1 -1
  151. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +1 -1
  152. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +1 -1
  153. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_lora.yaml +0 -3
  154. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +0 -3
  155. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual_lora.yaml +0 -3
  156. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +0 -3
  157. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +0 -3
  158. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +0 -3
  159. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +0 -4
  160. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +0 -3
  161. fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +0 -4
  162. fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +0 -4
  163. fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml +0 -1
  164. fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +0 -4
  165. fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +0 -4
  166. fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +0 -1
  167. fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml +0 -3
  168. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md +90 -0
  169. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-16_TA8.yaml +27 -0
  170. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA8.yaml +45 -0
  171. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_cars_dtd.yaml +23 -0
  172. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_cars.yaml +23 -0
  173. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_dtd.yaml +23 -0
  174. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_individual.yaml +7 -0
  175. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-L-14_TA8.yaml +26 -0
  176. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml +0 -1
  177. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml +0 -2
  178. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml +8 -10
  179. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_tta.yaml +66 -0
  180. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml +0 -1
  181. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml +0 -3
  182. fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +0 -4
  183. fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +0 -3
  184. fusion_bench_config/modelpool/gpt-2_glue.yaml +0 -3
  185. fusion_bench_config/nyuv2_config.yaml +0 -2
  186. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml +0 -3
  187. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml +0 -2
  188. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +0 -2
  189. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml +0 -2
  190. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml +24 -0
  191. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml +24 -0
  192. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml +24 -0
  193. fusion_bench_config/taskpool/gpt-2_glue.yaml +0 -1
  194. fusion_bench_config/taskpool/reward_model_evaluation.yaml +0 -4
  195. fusion_bench/method/DOGE_TA/__init__.py +0 -2
  196. /fusion_bench/method/{DOGE_TA → doge_ta}/layer_wise_adamerging.py +0 -0
  197. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/entry_points.txt +0 -0
  198. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info/licenses}/LICENSE +0 -0
  199. {fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.2)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -3,10 +3,9 @@ defaults:
3
3
  - fabric: auto
4
4
  # --- Model, Method, Task ---
5
5
  - modelpool: CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted
6
- - method: dummy
6
+ - method: dummy # change this to the method you want to use
7
7
  - taskpool: CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted
8
8
  - _self_
9
-
10
9
  _target_: fusion_bench.programs.FabricModelFusionProgram
11
10
  _recursive_: false
12
11
  fast_dev_run: false # Run a single batch of data to test the model or method
@@ -25,4 +25,3 @@ defaults:
25
25
  - emnist_letters
26
26
  - kmnist
27
27
  - rendered-sst2
28
-
@@ -2,4 +2,3 @@ emnist_letters:
2
2
  _target_: datasets.load_dataset
3
3
  path: tanganke/emnist_letters
4
4
  split: test
5
-
@@ -1,4 +1,4 @@
1
1
  fashion_mnist:
2
2
  _target_: datasets.load_dataset
3
3
  path: zalando-datasets/fashion_mnist
4
- split: test
4
+ split: test
@@ -25,4 +25,3 @@ defaults:
25
25
  - emnist_letters
26
26
  - kmnist
27
27
  - rendered-sst2
28
-
@@ -1,4 +1,4 @@
1
1
  fashion_mnist:
2
2
  _target_: datasets.load_dataset
3
3
  path: zalando-datasets/fashion_mnist
4
- split: train
4
+ split: train
@@ -1,7 +1,6 @@
1
1
  defaults:
2
2
  - loggers: tensorboard_logger
3
3
  - _self_
4
-
5
4
  _target_: lightning.Fabric
6
5
  _recursive_: true
7
6
  # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
@@ -1,7 +1,6 @@
1
1
  defaults:
2
2
  - loggers: tensorboard_logger
3
3
  - _self_
4
-
5
4
  _target_: lightning.Fabric
6
5
  _recursive_: true
7
6
  # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
@@ -2,7 +2,6 @@ defaults:
2
2
  - loggers: tensorboard_logger
3
3
  - strategy: llama_fsdp
4
4
  - _self_
5
-
6
5
  _target_: lightning.Fabric
7
6
  _recursive_: true
8
7
  # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
@@ -2,7 +2,6 @@ defaults:
2
2
  - loggers: tensorboard_logger
3
3
  - strategy: llama_peft_fsdp
4
4
  - _self_
5
-
6
5
  _target_: lightning.Fabric
7
6
  _recursive_: true
8
7
  # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.
@@ -1,6 +1,5 @@
1
1
  # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
2
2
  _target_: lightning.fabric.strategies.DeepSpeedStrategy
3
-
4
3
  accelerator: null
5
4
  zero_optimization: true
6
5
  stage: 2
@@ -6,4 +6,3 @@ auto_wrap_policy:
6
6
  _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
7
7
  activation_checkpointing_policy: ${.auto_wrap_policy}
8
8
  # limit_all_gathers: true
9
-
@@ -6,7 +6,6 @@ defaults:
6
6
  - method: dummy
7
7
  - taskpool: dummy
8
8
  - _self_
9
-
10
9
  _target_: fusion_bench.programs.FabricModelFusionProgram
11
10
  _recursive_: false
12
11
  fast_dev_run: false # Run a single batch of data to test the model or method
@@ -6,10 +6,8 @@ defaults:
6
6
  - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
7
7
  - taskpool: dummy
8
8
  - _self_
9
-
10
9
  _target_: fusion_bench.programs.FabricModelFusionProgram
11
10
  _recursive_: false
12
-
13
11
  fast_dev_run: false # Run a single batch of data to test the model or method
14
12
  # Run the script without actually running the experiment, use with `print_config=true`.
15
13
  # You can also use `--cfg` or `-c` to show the configuration instead of running.
@@ -3,13 +3,11 @@ defaults:
3
3
  - override modelpool: CausalLMPool/single_llama_model
4
4
  - override taskpool: dummy
5
5
  - _self_
6
-
7
6
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
8
7
  merged_model_save_kwargs:
9
8
  save_tokenizer: true
10
9
  # tokenizer_kwargs:
11
10
  # unk_token: "<s>" # https://github.com/huggingface/transformers/issues/24318#issuecomment-1596801322
12
-
13
11
  modelpool:
14
12
  model_kwargs:
15
13
  torch_dtype: float16
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.AdaSVDMergingForCLIPVisionModel
2
-
3
2
  scaling_factor: null
4
3
  num_samples: 256
5
4
  gate_k: 16
@@ -1,23 +1,18 @@
1
1
  _target_: fusion_bench.method.adamerging.flan_t5_layer_wise_adamerging.FlanT5LayerWiseAdaMergingAlgorithm
2
2
  _recursive_: false
3
-
4
3
  optimizer:
5
4
  _target_: torch.optim.Adam
6
5
  lr: 1e-3
7
-
8
6
  dataloader_kwargs:
9
7
  batch_size: 4
10
8
  num_workers: 0
11
-
12
9
  init_values: 0.3
13
10
  max_steps: 1000
14
11
  # if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
15
12
  merging_weights_load_path: null
16
13
  merging_weights_save_path: null
17
-
18
14
  variant: null
19
15
  clamp_weights: false
20
16
  tie_weights: false
21
17
  strict: false
22
-
23
18
  cache_dir: "outputs/cache"
@@ -1,23 +1,18 @@
1
1
  _target_: fusion_bench.method.adamerging.gpt2_layer_wise_adamerging.GPT2LayerWiseAdaMergingAlgorithm
2
2
  _recursive_: false
3
-
4
3
  optimizer:
5
4
  _target_: torch.optim.Adam
6
5
  lr: 1e-4
7
-
8
6
  dataloader_kwargs:
9
7
  batch_size: 16
10
8
  num_workers: 0
11
-
12
9
  init_values: 0.3
13
10
  max_steps: 1000
14
11
  # if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
15
12
  merging_weights_load_path: null
16
13
  merging_weights_save_path: null
17
-
18
14
  variant: null
19
15
  clamp_weights: false
20
16
  tie_weights: true
21
17
  strict: false
22
-
23
18
  cache_dir: "outputs/cache"
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.adamerging.llama_adamerging.LayerWiseAdaMergingForLlamaSFT
2
-
3
2
  seed: 0
4
3
  output_dir: null
5
4
  # path to initialize the merging weights
@@ -26,7 +25,6 @@ fast_dev_run: ${fast_dev_run}
26
25
  # the path for saving the merging weights
27
26
  save_interval: 100
28
27
  save_merged_model: true
29
-
30
28
  dataloader_kwargs:
31
29
  batch_size: 24
32
30
  num_workers: 0
@@ -1,5 +1,5 @@
1
- # this option can be "clip_task_wise_adamerging"
2
- name: ???
1
+ # this option can be one of "clip_task_wise_adamerging" or "clip_layer_wise_adamerging"
2
+ name: clip_layer_wise_adamerging
3
3
  # this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
4
4
  # if weights is specified, skip the test-time adaptation training
5
5
  weights: null
@@ -3,4 +3,3 @@ plot_heatmap: true
3
3
  trainable_only: true
4
4
  max_points_per_model: null
5
5
  output_path: null
6
-
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.TaskVectorViolinPlot
2
-
3
2
  trainable_only: true
4
3
  max_points_per_model: 1000
5
4
  fig_kwargs: null
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.ContinualImageClassificationFineTuningForCLIP
2
-
3
2
  seed: 42
4
3
  # shuffle the order of the tasks
5
4
  shuffle_order: true
@@ -3,7 +3,6 @@ name: clip_concrete_layer_wise_adamerging
3
3
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
4
4
  batch_size: 16
5
5
  num_workers: 8
6
-
7
6
  merge_dtype: null
8
7
  optimizer: adam
9
8
  lr: 1e-3
@@ -3,7 +3,6 @@ name: clip_concrete_task_wise_adamerging
3
3
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
4
4
  batch_size: 16
5
5
  num_workers: 8
6
-
7
6
  merge_dtype: null
8
7
  optimizer: adam
9
8
  lr: 1e-3
@@ -1,38 +1,27 @@
1
1
  # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
2
-
3
2
  name: clip_post_defense_AWM
4
-
5
3
  # batch size per gpu
6
4
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
7
5
  batch_size: 16
8
6
  num_workers: 8
9
-
10
7
  optimizer: adam
11
8
  lr: 1e-3
12
-
13
9
  scaling_factor: 0.3
14
-
15
10
  ###new
16
- adv_lr: 1e-4
11
+ adv_lr: 1e-4
17
12
  trigger_norm: 1000
18
13
  adv_weight: 0.01
19
-
20
-
21
14
  max_steps: 2000
22
15
  save_interval: 500
23
16
  initial_logits: 0
24
17
  temperature: 0.5
25
-
26
18
  # "discrete" or "continuous", this is the mask applied for evaluation, not during training
27
19
  # the performance of final model are expected to be similar
28
20
  eval_mask_type: continuous
29
-
30
21
  mask_checkpoint: null
31
22
  # if `clamp_weights` is true, the weights will be clamped to [0, 1]
32
23
  clamp_weights: false
33
-
34
24
  # arguments of `functional_call`
35
25
  tie_weights: true
36
26
  strict: false
37
-
38
27
  cache_dir: outputs
@@ -1,41 +1,30 @@
1
1
  # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
2
-
3
2
  name: clip_post_defense_SAU
4
-
5
3
  # batch size per gpu
6
4
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
7
5
  batch_size: 16
8
6
  num_workers: 8
9
-
10
7
  optimizer: adam
11
8
  lr: 1e-3
12
-
13
9
  scaling_factor: 0.3
14
-
15
10
  ###new
16
- adv_lr: 1e-4
11
+ adv_lr: 1e-4
17
12
  trigger_norm: 1000
18
13
  adv_weight: 0.01
19
14
  shared_weight: 0.01
20
15
  beta1: 0.5
21
16
  beta2: 0.5
22
-
23
-
24
17
  max_steps: 2000
25
18
  save_interval: 500
26
19
  initial_logits: 0
27
20
  temperature: 0.5
28
-
29
21
  # "discrete" or "continuous", this is the mask applied for evaluation, not during training
30
22
  # the performance of final model are expected to be similar
31
23
  eval_mask_type: continuous
32
-
33
24
  mask_checkpoint: null
34
25
  # if `clamp_weights` is true, the weights will be clamped to [0, 1]
35
26
  clamp_weights: false
36
-
37
27
  # arguments of `functional_call`
38
28
  tie_weights: true
39
29
  strict: false
40
-
41
30
  cache_dir: outputs
@@ -1,39 +1,30 @@
1
1
  # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
2
-
3
2
  name: clip_safe_concrete_layer_wise_adamerging
4
-
5
3
  # batch size per gpu
6
4
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
7
5
  batch_size: 16
8
6
  num_workers: 8
9
-
10
7
  optimizer: adam
11
8
  lr: 1e-3
12
9
  base_lr: 1
13
10
  adamerging_lr: 1e-3
14
-
15
11
  scaling_factor: 0.3
16
-
17
12
  max_steps: 1000
18
13
  max_adamerging_steps: 1000
19
14
  save_interval: 500
20
15
  initial_logits: 0
21
16
  temperature: 0.5
22
-
23
17
  ###new
24
- adv_lr: 1e-4
18
+ adv_lr: 1e-4
25
19
  trigger_norm: 1000
26
20
  adv_weight: 0.1
27
21
  # "discrete" or "continuous", this is the mask applied for evaluation, not during training
28
22
  # the performance of final model are expected to be similar
29
23
  eval_mask_type: continuous
30
-
31
24
  mask_checkpoint: null
32
25
  # if `clamp_weights` is true, the weights will be clamped to [0, 1]
33
26
  clamp_weights: false
34
-
35
27
  # arguments of `functional_call`
36
28
  tie_weights: true
37
29
  strict: false
38
-
39
30
  cache_dir: outputs
@@ -1,40 +1,27 @@
1
1
  # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
2
-
3
2
  name: clip_safe_concrete_task_arithmetic
4
-
5
3
  # batch size per gpu
6
4
  # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
7
5
  batch_size: 16
8
6
  num_workers: 8
9
-
10
7
  optimizer: adam
11
8
  lr: 1e-3
12
-
13
9
  scaling_factor: 0.3
14
-
15
-
16
-
17
10
  ###new
18
- adv_lr: 1e-4
11
+ adv_lr: 1e-4
19
12
  trigger_norm: 1000
20
13
  adv_weight: 0.1
21
-
22
-
23
14
  max_steps: 2000
24
15
  save_interval: 500
25
16
  initial_logits: 0
26
17
  temperature: 0.5
27
-
28
18
  # "discrete" or "continuous", this is the mask applied for evaluation, not during training
29
19
  # the performance of final model are expected to be similar
30
20
  eval_mask_type: continuous
31
-
32
21
  mask_checkpoint: null
33
22
  # if `clamp_weights` is true, the weights will be clamped to [0, 1]
34
23
  clamp_weights: false
35
-
36
24
  # arguments of `functional_call`
37
25
  tie_weights: true
38
26
  strict: false
39
-
40
27
  cache_dir: outputs
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.DareSimpleAverage
2
-
3
2
  sparsity_ratio: 0.5
4
3
  only_on_linear_weights: false
5
4
  rescale: true
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.DareTaskArithmetic
2
-
3
2
  scaling_factor: 0.3
4
3
  sparsity_ratio: 0.5
5
4
  only_on_linear_weights: false
@@ -1,10 +1,8 @@
1
1
  _target_: fusion_bench.method.dare.DareTiesMerging
2
-
3
2
  # === DARE parameters ===
4
3
  sparsity_ratio: 0.5
5
4
  only_on_linear_weights: false
6
5
  rescale: true
7
-
8
6
  # === Ties merging parameters ===
9
7
  # Scaling factor $\lambda$
10
8
  scaling_factor: 0.5
@@ -4,7 +4,6 @@ merge_mode: task_wise
4
4
  init_lambda: 0.3
5
5
  batch_reduce: true
6
6
  eval_batch_reduce: false
7
-
8
7
  _dict_feature_extractor_path: microsoft/resnet-18
9
8
  dict_processor:
10
9
  _target_: fusion_bench.method.dawe.dawe_for_clip.load_resnet_processor
@@ -18,14 +17,12 @@ gate_hidden_layers: 1
18
17
  # if task_vector_dtype is null, the task vector will have the same dtype as pretrained model
19
18
  task_vector_dtype: null
20
19
  task_vector_sparsity: 0
21
-
22
20
  # training & logging args
23
21
  max_steps: 1000
24
22
  save_interval: 200
25
23
  learning_rate: 1e-5
26
24
  resume_checkpoint_path: null
27
25
  skip_training: false
28
-
29
26
  # dataloader args
30
27
  batch_size: 1
31
28
  num_workers: 0
@@ -1,4 +1,4 @@
1
1
  _target_: fusion_bench.method.DOGE_TA_Algorithm
2
2
  subspace: 6
3
3
  K: 30
4
- lamda: 0.07
4
+ lamda: 0.07
@@ -1 +1 @@
1
- _target_: fusion_bench.method.MaxModelPredictorAlgorithm
1
+ _target_: fusion_bench.method.MaxModelPredictorAlgorithm
@@ -1,2 +1 @@
1
1
  _target_: fusion_bench.method.SimpleEnsembleAlgorithm
2
-
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.WeightedEnsembleAlgorithm
2
-
3
2
  normalize: true
4
3
  # this should be a list of floats, one for each model in the ensemble
5
4
  # If weights is null, the ensemble will use the default weights, which are equal weights for all models.
@@ -0,0 +1,30 @@
1
+ # this option can be "clip_task_wise_gossip"
2
+ name: clip_layer_wise_gossip
3
+ # _target_: fusion_bench.method.CLIPLayerWiseGossipAlgorithm
4
+ # this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
5
+ # if weights is specified, skip the test-time adaptation training
6
+ weights: null
7
+ # learning rate
8
+ optimizer: adam
9
+ lr: 1e-3
10
+ init_values: 0.3
11
+ # if `clamp_weights` is true, the weights will be clamped to [0, 1]
12
+ clamp_weights: false
13
+ # arguments of `functional_call`
14
+ tie_weights: true
15
+ strict: false
16
+ # this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
17
+ devices: 1
18
+ batch_size: 16
19
+ num_workers: 8
20
+ max_steps: 400 # 1000
21
+ fast_dev_run: ${fast_dev_run}
22
+ # the path for saving the merging weights
23
+ save_merging_weights: 'merging_weights.pt'
24
+ cache_dir: outputs
25
+ # this is the parameter about gossip
26
+ gossip_max_steps: 20
27
+ gossip_skip_adamerging: false
28
+ accuracy_test_interval: 0 # if this value is equal to 1, we will evaluate all models each time after Gossip
29
+ improve_dataset: true
30
+ topo: ring
@@ -0,0 +1,25 @@
1
+ _target_: fusion_bench.method.gossip.flan_t5_layer_wise_gossip.FlanT5LayerWiseGossipAlgorithm
2
+ _recursive_: false
3
+ optimizer:
4
+ _target_: torch.optim.Adam
5
+ lr: 1e-3
6
+ dataloader_kwargs:
7
+ batch_size: 4
8
+ num_workers: 0
9
+ init_values: 0.3
10
+ max_steps: 400
11
+ # if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
12
+ merging_weights_load_path: null
13
+ merging_weights_save_path: null
14
+ variant: null
15
+ clamp_weights: false
16
+ tie_weights: false
17
+ strict: false
18
+ weights: null
19
+ cache_dir: "outputs/cache"
20
+ # this is the parameter about gossip
21
+ gossip_max_steps: 20
22
+ gossip_skip_adamerging: false
23
+ accuracy_test_interval: 0 #if this value is equal to 1, we will evaluate all models each time after Gossip [1, 5, 10, 15, 20] it can also be a list #
24
+ improve_dataset: true
25
+ topo: ring
@@ -1,4 +1,3 @@
1
1
  _target_: fusion_bench.method.ISO_C_Merge
2
-
3
2
  scaling_factor: 1.0
4
3
  exclude_keys: null
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.ISO_CTS_Merge
2
-
3
2
  scaling_factor: 1.0
4
3
  common_space_fraction: 0.8
5
4
  exclude_keys: null
@@ -1,3 +1,2 @@
1
1
  _target_: fusion_bench.method.LinearInterpolationAlgorithm
2
-
3
2
  t: 0.5
@@ -7,13 +7,10 @@
7
7
  _target_: fusion_bench.method.ExPOAlgorithmForLlama
8
8
  extrapolation_factor: 0.1
9
9
  attention_scaling_factor: 1.0
10
-
11
10
  only_on_backbone: true
12
11
  on_linear_weights: true
13
12
  on_linear_bias: false
14
13
  on_embedding: false
15
-
16
14
  fix_last_n_layers: 0
17
15
  fix_first_n_layers: 0
18
-
19
16
  magnitude_sparsity_ratio: null
@@ -1,18 +1,13 @@
1
1
  _target_: fusion_bench.method.linear.llama_expo.ExPOWithDareForLLama
2
-
3
2
  extrapolation_factor: 0.1
4
3
  attention_scaling_factor: 1.0
5
-
6
4
  only_on_backbone: true
7
5
  on_linear_weights: true
8
6
  on_linear_bias: false
9
7
  on_embedding: false
10
-
11
8
  fix_last_n_layers: 0
12
9
  fix_first_n_layers: 0
13
-
14
10
  magnitude_sparsity_ratio: null
15
-
16
11
  # dare arguments
17
12
  dare_sparsity_ratio: 0.5
18
13
  dare_only_on_linear_weights: true
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.method.WeightedAverageAlgorithm
2
-
3
2
  normalize: true # if true, the weights will be normalized before merging
4
3
  weights: # List of weights for each model
5
4
  - 0.5
@@ -1,5 +1,4 @@
1
1
  _target_: WeightedAverageForLLama
2
-
3
2
  normalize: true # if true, the weights will be normalized before merging
4
3
  weights: # List of weights for each model
5
4
  - 0.5
@@ -1,12 +1,10 @@
1
1
  _target_: fusion_bench.method.BradleyTerryRewardModeling
2
2
  _recursive_: False
3
-
4
3
  optimizer:
5
4
  _target_: torch.optim.AdamW
6
5
  lr: 1e-5
7
6
  weight_decay: 0.01
8
7
  fused: null
9
-
10
8
  lr_scheduler:
11
9
  _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
12
10
  T_max: _T_max_ # this will be replaced by the expected number of training steps
@@ -14,13 +12,11 @@ lr_scheduler:
14
12
  warmup_steps: 100
15
13
  max_lr: ${..optimizer.lr}
16
14
  min_lr: 1e-6
17
-
18
15
  dataloader_kwargs:
19
16
  # per-gpu batch size
20
17
  batch_size: 1
21
18
  num_workers: 0
22
19
  pin_memory: True
23
-
24
20
  # Training hyperparameters
25
21
  # if max_epochs=-1, max_steps will be used to determine the number of training steps
26
22
  max_epochs: 3