fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. fusion_bench/__init__.py +1 -0
  2. fusion_bench/_get_started/__init__.py +3 -0
  3. fusion_bench/_get_started/greeting_program.py +49 -0
  4. fusion_bench/compat/method/base_algorithm.py +14 -0
  5. fusion_bench/constants/__init__.py +5 -0
  6. fusion_bench/constants/clip_vision.py +26 -2
  7. fusion_bench/constants/paths.py +4 -0
  8. fusion_bench/dataset/clip_dataset.py +2 -1
  9. fusion_bench/dataset/gpt2_glue.py +9 -9
  10. fusion_bench/dataset/image_corruption/__init__.py +0 -0
  11. fusion_bench/dataset/image_corruption/make_corruption.py +179 -0
  12. fusion_bench/dataset/image_dataset.py +1 -1
  13. fusion_bench/dataset/nyuv2.py +2 -2
  14. fusion_bench/method/__init__.py +16 -1
  15. fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +1 -1
  16. fusion_bench/method/adamerging/clip_task_wise_adamerging.py +11 -7
  17. fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -5
  18. fusion_bench/method/base_algorithm.py +195 -12
  19. fusion_bench/method/bitdelta/__init__.py +4 -0
  20. fusion_bench/method/bitdelta/bitdelta.py +156 -0
  21. fusion_bench/method/bitdelta/bitdelta_utils/__init__.py +0 -0
  22. fusion_bench/method/bitdelta/bitdelta_utils/binary_gemm_kernel.py +462 -0
  23. fusion_bench/method/bitdelta/bitdelta_utils/data.py +35 -0
  24. fusion_bench/method/bitdelta/bitdelta_utils/diff.py +129 -0
  25. fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py +0 -1
  26. fusion_bench/method/depth_upscaling/depth_upscaling.py +4 -9
  27. fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py +4 -5
  28. fusion_bench/method/doge_ta/doge_ta.py +1 -1
  29. fusion_bench/method/ensemble.py +12 -12
  30. fusion_bench/method/expert_sparsity/utils/calibration_data.py +1 -1
  31. fusion_bench/method/fisher_merging/clip_fisher_merging.py +2 -2
  32. fusion_bench/method/fisher_merging/fisher_merging.py +6 -15
  33. fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +3 -10
  34. fusion_bench/method/fw_merging/fw_hard.py +1 -1
  35. fusion_bench/method/fw_merging/fw_soft.py +1 -1
  36. fusion_bench/method/gossip/clip_layer_wise_gossip.py +4 -5
  37. fusion_bench/method/linear/expo.py +2 -1
  38. fusion_bench/method/linear/linear_interpolation.py +6 -4
  39. fusion_bench/method/linear/simple_average_for_llama.py +16 -6
  40. fusion_bench/method/lm_finetune/bradley_terry_rm.py +2 -2
  41. fusion_bench/method/mixture_of_experts/mixtral_upcycling.py +9 -26
  42. fusion_bench/method/model_recombination.py +2 -5
  43. fusion_bench/method/moe_pruner/hooks/__init__.py +1 -2
  44. fusion_bench/method/moe_pruner/utils/data.py +2 -1
  45. fusion_bench/method/moe_pruner/utils/prune.py +6 -1
  46. fusion_bench/method/pruning/llama_magnitude_prune.py +1 -1
  47. fusion_bench/method/pruning/wanda_utils/data.py +1 -2
  48. fusion_bench/method/pwe_moe/clip_pwe_moe.py +12 -34
  49. fusion_bench/method/randes/modelsoup.py +1 -3
  50. fusion_bench/method/regmean/clip_regmean.py +2 -2
  51. fusion_bench/method/regmean/gpt2_regmean.py +3 -10
  52. fusion_bench/method/regmean/regmean.py +2 -11
  53. fusion_bench/method/regmean_plusplus/__init__.py +3 -0
  54. fusion_bench/method/regmean_plusplus/clip_regmean_plusplus.py +199 -0
  55. fusion_bench/method/regmean_plusplus/regmean_plusplus.py +383 -0
  56. fusion_bench/method/simple_average.py +16 -4
  57. fusion_bench/method/slerp/slerp.py +5 -2
  58. fusion_bench/method/smile_upscaling/error_accumulation.py +177 -0
  59. fusion_bench/method/smile_upscaling/projected_energy.py +145 -0
  60. fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +39 -28
  61. fusion_bench/method/smile_upscaling/smile_upscaling.py +12 -5
  62. fusion_bench/method/tall_mask/task_arithmetic.py +3 -11
  63. fusion_bench/method/task_arithmetic/task_arithmetic.py +6 -10
  64. fusion_bench/method/ties_merging/ties_merging.py +13 -26
  65. fusion_bench/method/we_moe/clip_we_moe.py +5 -4
  66. fusion_bench/method/we_moe/we_moe.py +6 -6
  67. fusion_bench/method/weighted_average/llama.py +4 -16
  68. fusion_bench/metrics/continual_learning/__init__.py +1 -0
  69. fusion_bench/metrics/continual_learning/backward_transfer.py +1 -1
  70. fusion_bench/metrics/nyuv2/__init__.py +2 -2
  71. fusion_bench/metrics/nyuv2/segmentation.py +1 -1
  72. fusion_bench/mixins/__init__.py +10 -2
  73. fusion_bench/mixins/clip_classification.py +4 -3
  74. fusion_bench/mixins/hydra_config.py +105 -7
  75. fusion_bench/mixins/lightning_fabric.py +2 -0
  76. fusion_bench/mixins/serialization.py +265 -48
  77. fusion_bench/modelpool/__init__.py +2 -2
  78. fusion_bench/modelpool/base_pool.py +29 -9
  79. fusion_bench/modelpool/causal_lm/causal_lm.py +9 -0
  80. fusion_bench/modelpool/clip_vision/modelpool.py +43 -12
  81. fusion_bench/modelpool/seq_classification_lm/__init__.py +1 -1
  82. fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +1 -1
  83. fusion_bench/models/__init__.py +2 -1
  84. fusion_bench/models/expert_sparsity/mixtral/__init__.py +1 -1
  85. fusion_bench/models/hf_utils.py +182 -0
  86. fusion_bench/models/linearized/linearized_model_utils.py +4 -4
  87. fusion_bench/models/linearized/vision_model.py +1 -1
  88. fusion_bench/models/modeling_deepseek_v2/__init__.py +1 -1
  89. fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py +4 -4
  90. fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py +0 -1
  91. fusion_bench/models/modeling_smile_gemma2/__init__.py +9 -0
  92. fusion_bench/models/modeling_smile_gemma2/configuration_smile_gemma2.py +20 -0
  93. fusion_bench/models/modeling_smile_gemma2/modeling_smile_gemma2.py +986 -0
  94. fusion_bench/models/modeling_smile_gemma2/register.py +26 -0
  95. fusion_bench/models/modeling_smile_llama/__init__.py +0 -0
  96. fusion_bench/models/modeling_smile_llama/configuration_smile_llama.py +20 -0
  97. fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py +705 -0
  98. fusion_bench/models/modeling_smile_llama/register.py +8 -0
  99. fusion_bench/models/modeling_smile_mistral/__init__.py +5 -47
  100. fusion_bench/models/modeling_smile_qwen2/__init__.py +1 -1
  101. fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +6 -7
  102. fusion_bench/models/modeling_smile_qwen2/register.py +1 -4
  103. fusion_bench/models/parameter_dict.py +1 -1
  104. fusion_bench/models/sparse_we_moe.py +1 -53
  105. fusion_bench/models/utils.py +26 -0
  106. fusion_bench/models/we_moe.py +1 -53
  107. fusion_bench/models/wrappers/ensemble.py +6 -4
  108. fusion_bench/models/wrappers/layer_wise_fusion.py +1 -1
  109. fusion_bench/models/wrappers/task_wise_fusion.py +250 -72
  110. fusion_bench/programs/base_program.py +81 -2
  111. fusion_bench/programs/fabric_fusion_program.py +24 -8
  112. fusion_bench/scripts/cli.py +6 -6
  113. fusion_bench/taskpool/base_pool.py +4 -3
  114. fusion_bench/taskpool/clip_vision/taskpool.py +34 -18
  115. fusion_bench/taskpool/dummy.py +1 -1
  116. fusion_bench/taskpool/lm_eval_harness/taskpool.py +1 -2
  117. fusion_bench/tasks/clip_classification/__init__.py +6 -4
  118. fusion_bench/utils/__init__.py +6 -1
  119. fusion_bench/utils/devices.py +14 -4
  120. fusion_bench/utils/instantiate_utils.py +3 -1
  121. fusion_bench/utils/misc.py +48 -2
  122. fusion_bench/utils/modelscope.py +265 -0
  123. fusion_bench/utils/parameters.py +2 -2
  124. fusion_bench/utils/rich_utils.py +3 -0
  125. fusion_bench/utils/state_dict_arithmetic.py +34 -27
  126. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/METADATA +31 -24
  127. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/RECORD +189 -153
  128. fusion_bench_config/_get_started/clip_evaluate_single_model.yaml +21 -0
  129. fusion_bench_config/_get_started/clip_simple_average.yaml +23 -0
  130. fusion_bench_config/_get_started/clip_task_arithmetic.yaml +24 -0
  131. fusion_bench_config/_get_started/greeting_program.yaml +4 -0
  132. fusion_bench_config/fabric/loggers/csv_logger.yaml +3 -3
  133. fusion_bench_config/fabric/loggers/tensorboard_logger.yaml +3 -3
  134. fusion_bench_config/fabric_model_fusion.yaml +45 -17
  135. fusion_bench_config/hydra/default.yaml +6 -2
  136. fusion_bench_config/llama_full_finetune.yaml +1 -0
  137. fusion_bench_config/method/adamerging/clip.yaml +1 -1
  138. fusion_bench_config/method/bitdelta/bitdelta.yaml +12 -0
  139. fusion_bench_config/method/depth_upscaling.yaml +4 -1
  140. fusion_bench_config/method/regmean/clip_regmean.yaml +1 -1
  141. fusion_bench_config/method/regmean_plusplus/clip_regmean_plusplus.yaml +11 -0
  142. fusion_bench_config/method/smile_upscaling/error_accumulation.yaml +5 -0
  143. fusion_bench_config/method/smile_upscaling/projected_energy.yaml +2 -0
  144. fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +1 -0
  145. fusion_bench_config/modelpool/CLIPVisionModelPool/_template.yaml +1 -4
  146. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20.yaml +73 -8
  147. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TALL20_model_only.yaml +27 -7
  148. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8.yaml +34 -4
  149. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +14 -17
  150. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only.yaml +14 -3
  151. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL10.yaml +39 -5
  152. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL12.yaml +49 -5
  153. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14.yaml +55 -5
  154. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL14_model_only.yaml +21 -4
  155. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL16.yaml +61 -5
  156. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL18.yaml +67 -5
  157. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20.yaml +73 -5
  158. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL20_model_only.yaml +26 -3
  159. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +4 -9
  160. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml +7 -5
  161. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +6 -10
  162. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_cars.yaml +6 -7
  163. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_dtd.yaml +6 -7
  164. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_cars_and_dtd.yaml +7 -8
  165. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml +8 -6
  166. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +4 -6
  167. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml +32 -7
  168. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml +14 -6
  169. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml +73 -8
  170. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml +27 -7
  171. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +6 -10
  172. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +2 -2
  173. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml +9 -0
  174. fusion_bench_config/modelpool/CausalLMPool/mistral-7b.yaml +6 -0
  175. fusion_bench_config/modelpool/CausalLMPool/mixtral_moe_merging.yaml +10 -0
  176. fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +4 -12
  177. fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +6 -16
  178. fusion_bench_config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml +8 -0
  179. fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/llama_preference700k.yaml +1 -1
  180. fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/single_reward_model.yaml +1 -1
  181. fusion_bench_config/nyuv2_config.yaml +3 -1
  182. fusion_bench_config/nyuv2_mtl_train.yaml +1 -0
  183. fusion_bench_config/path/default.yaml +28 -0
  184. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_svhn_and_mnist.yaml +24 -0
  185. fusion_bench_config/method/adamerging.yaml +0 -23
  186. fusion_bench_config/modelpool/mixtral_moe_merging.yaml +0 -14
  187. fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml +0 -6
  188. fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -22
  189. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/WHEEL +0 -0
  190. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/entry_points.txt +0 -0
  191. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/licenses/LICENSE +0 -0
  192. {fusion_bench-0.2.19.dist-info → fusion_bench-0.2.21.dist-info}/top_level.txt +0 -0
  193. /fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/roberta-base_glue.yaml +0 -0
@@ -0,0 +1,21 @@
1
+ _target_: fusion_bench.programs.FabricModelFusionProgram
2
+ _recursive_: false
3
+ method:
4
+ _target_: fusion_bench.method.DummyAlgorithm
5
+ modelpool:
6
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
7
+ models:
8
+ _pretrained_: openai/clip-vit-base-patch32
9
+ taskpool:
10
+ _target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
11
+ test_datasets:
12
+ sun397:
13
+ _target_: datasets.load_dataset
14
+ path: tanganke/sun397
15
+ split: test
16
+ stanford-cars:
17
+ _target_: datasets.load_dataset
18
+ path: tanganke/stanford_cars
19
+ split: test
20
+ clip_model: openai/clip-vit-base-patch32
21
+ processor: openai/clip-vit-base-patch32
@@ -0,0 +1,23 @@
1
+ _target_: fusion_bench.programs.FabricModelFusionProgram # (1)!
2
+ _recursive_: false
3
+ method: # (2)!
4
+ _target_: fusion_bench.method.SimpleAverageAlgorithm
5
+ modelpool: # (3)!
6
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
7
+ models:
8
+ _pretrained_: openai/clip-vit-base-patch32
9
+ sun397: tanganke/clip-vit-base-patch32_sun397
10
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
11
+ taskpool: # (4)!
12
+ _target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
13
+ test_datasets:
14
+ sun397:
15
+ _target_: datasets.load_dataset
16
+ path: tanganke/sun397
17
+ split: test
18
+ stanford-cars:
19
+ _target_: datasets.load_dataset
20
+ path: tanganke/stanford_cars
21
+ split: test
22
+ clip_model: openai/clip-vit-base-patch32
23
+ processor: openai/clip-vit-base-patch32
@@ -0,0 +1,24 @@
1
+ _target_: fusion_bench.programs.FabricModelFusionProgram
2
+ _recursive_: false
3
+ method:
4
+ _target_: fusion_bench.method.TaskArithmeticAlgorithm
5
+ scaling_factor: 0.7
6
+ modelpool:
7
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
8
+ models:
9
+ _pretrained_: openai/clip-vit-base-patch32
10
+ sun397: tanganke/clip-vit-base-patch32_sun397
11
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
12
+ taskpool:
13
+ _target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
14
+ test_datasets:
15
+ sun397:
16
+ _target_: datasets.load_dataset
17
+ path: tanganke/sun397
18
+ split: test
19
+ stanford-cars:
20
+ _target_: datasets.load_dataset
21
+ path: tanganke/stanford_cars
22
+ split: test
23
+ clip_model: openai/clip-vit-base-patch32
24
+ processor: openai/clip-vit-base-patch32
@@ -0,0 +1,4 @@
1
+ _target_: fusion_bench._get_started.greeting_program.GreetingProgram
2
+ message: "Welcome to FusionBench"
3
+ name: "Developer"
4
+ repeat_count: 3
@@ -3,9 +3,9 @@ _target_: lightning.fabric.loggers.CSVLogger
3
3
  # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
4
4
 
5
5
  # root directory for all logging
6
- root_dir: outputs/logs
6
+ root_dir: ${path.log_dir}
7
7
  # the name of the experiment
8
- name: lightning_logs
9
- version: null
8
+ name: ""
9
+ version: ""
10
10
  prefix: ""
11
11
  flush_logs_every_n_steps: 100
@@ -3,9 +3,9 @@ _target_: lightning.fabric.loggers.TensorBoardLogger
3
3
  # for example, `outputs/logs/lightning_logs/version_0` and `outputs/logs/lightning_logs/version_1` by default
4
4
 
5
5
  # root directory for all logging
6
- root_dir: outputs/logs
6
+ root_dir: ${path.log_dir}
7
7
  # the name of the experiment
8
- name: "lightning_logs"
9
- version: null
8
+ name: ""
9
+ version: ""
10
10
  sub_dir: null
11
11
  default_hp_metric: false
@@ -1,19 +1,47 @@
1
+ # =============================================================================
2
+ # FusionBench Fabric Model Fusion Configuration
3
+ # =============================================================================
4
+ # This configuration file defines the settings for running model fusion experiments
5
+ # using PyTorch Lightning Fabric framework within FusionBench.
6
+ #
7
+ # The configuration includes:
8
+ #
9
+ # - Hydra framework settings and overrides
10
+ # - PyTorch Lightning Fabric configuration for distributed training
11
+ # - Path management for data, outputs, and logs
12
+ # - (core components) Model pool, fusion method, and task pool specifications
13
+ # - Experiment execution parameters and debugging options
14
+ #
15
+ # =============================================================================
16
+ # Hydra Configuration Defaults
17
+ # =============================================================================
1
18
  defaults:
2
- - hydra: default
3
- - fabric: auto
4
- # --- Model, Method, Task ---
5
- - modelpool: CLIPVisionModelPool/clip-vit-base-patch32_TA8
6
- - method: dummy
7
- - taskpool: dummy
8
- - _self_
19
+ - hydra: default # Hydra framework configuration
20
+ - fabric: auto # PyTorch Lightning Fabric auto-configuration
21
+ - path: default # Path management configuration
22
+ # --- Core Components ---
23
+ - modelpool: CLIPVisionModelPool/clip-vit-base-patch32_TA8 # Model pool specification
24
+ - method: dummy # Fusion method (placeholder)
25
+ - taskpool: dummy # Task pool specification (placeholder)
26
+ - _self_ # Self-reference for override priority
27
+ # =============================================================================
28
+ # Program Configuration
29
+ # =============================================================================
9
30
  _target_: fusion_bench.programs.FabricModelFusionProgram
10
- _recursive_: false
11
- fast_dev_run: false # Run a single batch of data to test the model or method
12
- # Run the script without actually running the experiment, use with `print_config=true`.
13
- # You can also use `--cfg` or `-c` to show the configuration instead of running.
14
- dry_run: false
15
- print_config: true # Print the configuration to the console
16
- merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
17
- merged_model_save_kwargs: null
18
- report_save_path: "{log_dir}/program_report.json" # path to save the result report
19
- print_function_call: true # set to false if you don't want to print the details of instantiate calls
31
+ _recursive_: false # Disable recursive instantiation
32
+ # =============================================================================
33
+ # Experiment Execution Settings
34
+ # =============================================================================
35
+ # Development and debugging options
36
+ fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
37
+ dry_run: false # Show configuration without running experiment
38
+ print_config: true # Display full configuration before execution
39
+ print_function_call: true # Show detailed instantiation calls
40
+ # =============================================================================
41
+ # Output and Logging Configuration
42
+ # =============================================================================
43
+ # Model saving configuration
44
+ merged_model_save_path: null # Path to save merged model.
45
+ merged_model_save_kwargs: null # Additional kwargs for model saving.
46
+ # Report generation
47
+ report_save_path: "{log_dir}/program_report.json" # Experiment results report path
@@ -2,7 +2,11 @@ defaults:
2
2
  - override help: fusion_bench_help
3
3
  - override job_logging: rich_logging
4
4
  run:
5
- dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
5
+ dir: ${path.log_dir}
6
6
  sweep:
7
- dir: outputs/${hydra.job.name}/${now:%Y-%m-%d_%H-%M-%S}
7
+ dir: ${path.log_dir}
8
8
  subdir: ${hydra.job.num}
9
+ job:
10
+ env_set:
11
+ HYDRA_FULL_ERROR: ${oc.env:HYDRA_FULL_ERROR,1}
12
+ output_subdir: ""
@@ -1,6 +1,7 @@
1
1
  defaults:
2
2
  - hydra: default
3
3
  - fabric: llama_fsdp
4
+ - path: default
4
5
  # --- Model, Method, Task ---
5
6
  - method: lm_finetune/fullfinetune_sft.yaml
6
7
  - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
@@ -1,5 +1,5 @@
1
1
  # this option can be "clip_task_wise_adamerging"
2
- name: ???
2
+ name: clip_layer_wise_adamerging
3
3
  # this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
4
4
  # if weights is specified, skip the test-time adaptation training
5
5
  weights: null
@@ -0,0 +1,12 @@
1
+ _target_: fusion_bench.method.bitdelta.BitDeltaAlgorithm
2
+ save_dir: null
3
+ save_full_model: false
4
+ # training arguments
5
+ lr: 1e-4
6
+ batch_size: 4
7
+ num_steps: 100
8
+ # dataset arguments
9
+ dataset_name: c4
10
+ subset: en
11
+ split: train
12
+ max_length: 128
@@ -1,5 +1,8 @@
1
1
  _target_: DepthUpscalingAlgorithm
2
- # this should be a list of integers or string, indicating the sequence of layers. If the entry is an integer, it will use the n-th layer of the model. If the entry is a string, it will use the layers specified by the string. The string should be a valid python expression that evaluates to a list of integers.
2
+ # this should be a list of integers or string, indicating the sequence of layers.
3
+ # If the entry is an integer, it will use the n-th layer of the model.
4
+ # If the entry is a string, it will use the layers specified by the string.
5
+ # The string should be a valid python expression that evaluates to a list of integers.
3
6
  # for example, ["range(0,12)", "range(6,12)"] will use the first 12 layers and the last 6 layers of the model to construct the new model
4
7
  # [0, 2, 4, "range(6,12)"] will use the 1st, 3rd, 5th, and the 7th to 12th layers of the model to construct the new model
5
8
  layer_indices: null
@@ -5,7 +5,7 @@ exclude_param_names_regex: []
5
5
  num_regmean_examples: 256
6
6
  weight_transpose: true
7
7
  # float, reduce non-diagonal elements in regmean weights by multiplying this scalar
8
- reduce_non_diagonal_ratio: 0.6
8
+ reduce_non_diagonal_ratio: 0.95
9
9
  dataloader_kwargs:
10
10
  batch_size: 32
11
11
  num_workers: 0
@@ -0,0 +1,11 @@
1
+ _target_: fusion_bench.method.RegMeanAlgorithmForCLIPPlusPlus
2
+ # list, regular expression of names of parameters that need to be excluded
3
+ exclude_param_names_regex: []
4
+ # numbers of examples to compute regmean weights
5
+ num_regmean_examples: 256
6
+ weight_transpose: true
7
+ # float, reduce non-diagonal elements in regmean weights by multiplying this scalar
8
+ reduce_non_diagonal_ratio: 0.95
9
+ dataloader_kwargs:
10
+ batch_size: 32
11
+ num_workers: 0
@@ -0,0 +1,5 @@
1
+ # Measure error accumulation
2
+ _target_: fusion_bench.method.smile_upscaling.error_accumulation.ErrorAccumulationAnalysisForCLIP
3
+ gate_k: 16
4
+ k: 128
5
+ top_k: 1
@@ -0,0 +1,2 @@
1
+ # Measure projected energy
2
+ _target_: fusion_bench.method.smile_upscaling.projected_energy.ProjectedEnergyAnalysis
@@ -11,3 +11,4 @@ num_experts_per_tok: 1
11
11
  rank_of_router: 8
12
12
  # if rank_of_expert < 0, dense expert is used.
13
13
  rank_of_expert: 64
14
+ save_with_remote_code: true
@@ -2,11 +2,8 @@ _usage_: |
2
2
  defaults:
3
3
  - CLIPVisionModelPool@: _template
4
4
  _target_: fusion_bench.modelpool.CLIPVisionModelPool
5
- _version_: "0.2"
6
5
  _recursive_: False
7
6
  models: ???
8
7
  train_datasets: null
9
8
  test_datasets: null
10
- processor:
11
- _target_: transformers.CLIPProcessor.from_pretrained
12
- pretrained_model_name_or_path: openai/clip-vit-base-patch32
9
+ processor: openai/clip-vit-base-patch32
@@ -1,11 +1,76 @@
1
- # The 20 task used in the paper:
1
+ # The 20 task used in the paper:
2
2
  # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
3
  # http://arxiv.org/abs/2405.07813
4
4
  defaults:
5
- - CLIPVisionModelPool@: _template
6
- - /model/clip-vit@models: clip-vit-base-patch16_TALL20
7
- - /dataset/image_classification/train@train_datasets: TALL20
8
- - /dataset/image_classification/test@test_datasets: TALL20
9
- processor:
10
- _target_: transformers.CLIPProcessor.from_pretrained
11
- pretrained_model_name_or_path: openai/clip-vit-base-patch16
5
+ - /dataset/image_classification/train@train_datasets:
6
+ # -- begin of eight tasks in the task arithmetic paper ---
7
+ - sun397
8
+ - stanford-cars
9
+ - resisc45
10
+ - eurosat
11
+ - svhn
12
+ - gtsrb
13
+ - mnist
14
+ - dtd
15
+ # -- end of eight tasks in the task arithmetic paper ---
16
+ - oxford_flowers102
17
+ - pcam
18
+ - fer2013
19
+ - oxford-iiit-pet
20
+ - stl10
21
+ - cifar100
22
+ - cifar10
23
+ - food101
24
+ - fashion_mnist
25
+ - emnist_letters
26
+ - kmnist
27
+ - rendered-sst2
28
+ - /dataset/image_classification/test@test_datasets:
29
+ # -- begin of eight tasks in the task arithmetic paper ---
30
+ - sun397
31
+ - stanford-cars
32
+ - resisc45
33
+ - eurosat
34
+ - svhn
35
+ - gtsrb
36
+ - mnist
37
+ - dtd
38
+ # -- end of eight tasks in the task arithmetic paper ---
39
+ - oxford_flowers102
40
+ - pcam
41
+ - fer2013
42
+ - oxford-iiit-pet
43
+ - stl10
44
+ - cifar100
45
+ - cifar10
46
+ - food101
47
+ - fashion_mnist
48
+ - emnist_letters
49
+ - kmnist
50
+ - rendered-sst2
51
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
52
+ _recursive_: False
53
+ processor: openai/clip-vit-base-patch16
54
+ models:
55
+ _pretrained_: openai/clip-vit-base-patch16
56
+ sun397: tanganke/clip-vit-base-patch16_sun397
57
+ stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars
58
+ resisc45: tanganke/clip-vit-base-patch16_resisc45
59
+ eurosat: tanganke/clip-vit-base-patch16_eurosat
60
+ svhn: tanganke/clip-vit-base-patch16_svhn
61
+ gtsrb: tanganke/clip-vit-base-patch16_gtsrb
62
+ mnist: tanganke/clip-vit-base-patch16_mnist
63
+ dtd: tanganke/clip-vit-base-patch16_dtd
64
+ oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102
65
+ pcam: tanganke/clip-vit-base-patch16_pcam
66
+ fer2013: tanganke/clip-vit-base-patch16_fer2013
67
+ oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet
68
+ stl10: tanganke/clip-vit-base-patch16_stl10
69
+ cifar100: tanganke/clip-vit-base-patch16_cifar100
70
+ cifar10: tanganke/clip-vit-base-patch16_cifar10
71
+ food101: tanganke/clip-vit-base-patch16_food101
72
+ fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist
73
+ emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters
74
+ kmnist: tanganke/clip-vit-base-patch16_kmnist
75
+ rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2
76
+ platform: hf
@@ -1,9 +1,29 @@
1
- # The 20 task used in the paper:
1
+ # The 20 task used in the paper:
2
2
  # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
3
  # http://arxiv.org/abs/2405.07813
4
- defaults:
5
- - CLIPVisionModelPool@: _template
6
- - /model/clip-vit@models: clip-vit-base-patch16_TALL20
7
- processor:
8
- _target_: transformers.CLIPProcessor.from_pretrained
9
- pretrained_model_name_or_path: openai/clip-vit-base-patch16
4
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
5
+ _recursive_: False
6
+ processor: openai/clip-vit-base-patch16
7
+ models:
8
+ _pretrained_: openai/clip-vit-base-patch16
9
+ sun397: tanganke/clip-vit-base-patch16_sun397
10
+ stanford-cars: tanganke/clip-vit-base-patch16_stanford-cars
11
+ resisc45: tanganke/clip-vit-base-patch16_resisc45
12
+ eurosat: tanganke/clip-vit-base-patch16_eurosat
13
+ svhn: tanganke/clip-vit-base-patch16_svhn
14
+ gtsrb: tanganke/clip-vit-base-patch16_gtsrb
15
+ mnist: tanganke/clip-vit-base-patch16_mnist
16
+ dtd: tanganke/clip-vit-base-patch16_dtd
17
+ oxford_flowers102: tanganke/clip-vit-base-patch16_oxford_flowers102
18
+ pcam: tanganke/clip-vit-base-patch16_pcam
19
+ fer2013: tanganke/clip-vit-base-patch16_fer2013
20
+ oxford-iiit-pet: tanganke/clip-vit-base-patch16_oxford-iiit-pet
21
+ stl10: tanganke/clip-vit-base-patch16_stl10
22
+ cifar100: tanganke/clip-vit-base-patch16_cifar100
23
+ cifar10: tanganke/clip-vit-base-patch16_cifar10
24
+ food101: tanganke/clip-vit-base-patch16_food101
25
+ fashion_mnist: tanganke/clip-vit-base-patch16_fashion_mnist
26
+ emnist_letters: tanganke/clip-vit-base-patch16_emnist_letters
27
+ kmnist: tanganke/clip-vit-base-patch16_kmnist
28
+ rendered-sst2: tanganke/clip-vit-base-patch16_rendered-sst2
29
+ platform: hf
@@ -1,5 +1,35 @@
1
+ # eight image classification tasks defined in task arithmetic paper
1
2
  defaults:
2
- - CLIPVisionModelPool@: _template
3
- - /model/clip-vit@models: clip-vit-base-patch32_eight_tasks
4
- - /dataset/image_classification/train@train_datasets: the_eight_tasks
5
- - /dataset/image_classification/test@test_datasets: the_eight_tasks
3
+ - /dataset/image_classification/train@train_datasets:
4
+ - sun397
5
+ - stanford-cars
6
+ - resisc45
7
+ - eurosat
8
+ - svhn
9
+ - gtsrb
10
+ - mnist
11
+ - dtd
12
+ - /dataset/image_classification/test@test_datasets:
13
+ - sun397
14
+ - stanford-cars
15
+ - resisc45
16
+ - eurosat
17
+ - svhn
18
+ - gtsrb
19
+ - mnist
20
+ - dtd
21
+ - _self_
22
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
23
+ _recursive_: False
24
+ processor: openai/clip-vit-base-patch32
25
+ models:
26
+ _pretrained_: openai/clip-vit-base-patch32
27
+ sun397: tanganke/clip-vit-base-patch32_sun397
28
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
29
+ resisc45: tanganke/clip-vit-base-patch32_resisc45
30
+ eurosat: tanganke/clip-vit-base-patch32_eurosat
31
+ svhn: tanganke/clip-vit-base-patch32_svhn
32
+ gtsrb: tanganke/clip-vit-base-patch32_gtsrb
33
+ mnist: tanganke/clip-vit-base-patch32_mnist
34
+ dtd: tanganke/clip-vit-base-patch32_dtd
35
+ platform: hf
@@ -1,21 +1,18 @@
1
1
  defaults:
2
- - _self_
3
- - /model/clip-vit@models:
4
- - clip-vit-base-patch32
5
- - clip-vit-base-patch32_sun397
6
- - clip-vit-base-patch32_stanford-cars
7
- - clip-vit-base-patch32_resisc45
8
- - clip-vit-base-patch32_eurosat
9
- - clip-vit-base-patch32_svhn
10
- - clip-vit-base-patch32_gtsrb
11
- - clip-vit-base-patch32_mnist
12
- - clip-vit-base-patch32_dtd
13
2
  - /dataset/image_classification/train@train_datasets:
14
3
  - tiny-imagenet
4
+ - _self_
15
5
  _target_: fusion_bench.modelpool.CLIPVisionModelPool
16
- _recursive_: false
17
- models: ???
18
- train_datasets: ???
19
- processor:
20
- _target_: transformers.CLIPProcessor.from_pretrained
21
- pretrained_model_name_or_path: openai/clip-vit-base-patch32
6
+ _recursive_: False
7
+ processor: openai/clip-vit-base-patch32
8
+ models:
9
+ _pretrained_: openai/clip-vit-base-patch32
10
+ sun397: tanganke/clip-vit-base-patch32_sun397
11
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
12
+ resisc45: tanganke/clip-vit-base-patch32_resisc45
13
+ eurosat: tanganke/clip-vit-base-patch32_eurosat
14
+ svhn: tanganke/clip-vit-base-patch32_svhn
15
+ gtsrb: tanganke/clip-vit-base-patch32_gtsrb
16
+ mnist: tanganke/clip-vit-base-patch32_mnist
17
+ dtd: tanganke/clip-vit-base-patch32_dtd
18
+ platform: hf
@@ -1,3 +1,14 @@
1
- defaults:
2
- - CLIPVisionModelPool@: _template
3
- - /model/clip-vit@models: clip-vit-base-patch32_eight_tasks
1
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
2
+ _recursive_: False
3
+ processor: openai/clip-vit-base-patch32
4
+ models:
5
+ _pretrained_: openai/clip-vit-base-patch32
6
+ sun397: tanganke/clip-vit-base-patch32_sun397
7
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
8
+ resisc45: tanganke/clip-vit-base-patch32_resisc45
9
+ eurosat: tanganke/clip-vit-base-patch32_eurosat
10
+ svhn: tanganke/clip-vit-base-patch32_svhn
11
+ gtsrb: tanganke/clip-vit-base-patch32_gtsrb
12
+ mnist: tanganke/clip-vit-base-patch32_mnist
13
+ dtd: tanganke/clip-vit-base-patch32_dtd
14
+ platform: hf
@@ -1,8 +1,42 @@
1
- # The 20 task used in the paper:
1
+ # The 10 task used in the paper (TALL mask):
2
2
  # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
3
  # http://arxiv.org/abs/2405.07813
4
4
  defaults:
5
- - CLIPVisionModelPool@: _template
6
- - /model/clip-vit@models: clip-vit-base-patch32_TALL10
7
- - /dataset/image_classification/train@train_datasets: TALL10
8
- - /dataset/image_classification/test@test_datasets: TALL10
5
+ - /dataset/image_classification/train@train_datasets:
6
+ - sun397
7
+ - stanford-cars
8
+ - resisc45
9
+ - eurosat
10
+ - svhn
11
+ - gtsrb
12
+ - mnist
13
+ - dtd
14
+ - oxford_flowers102
15
+ - pcam
16
+ - /dataset/image_classification/test@test_datasets:
17
+ - sun397
18
+ - stanford-cars
19
+ - resisc45
20
+ - eurosat
21
+ - svhn
22
+ - gtsrb
23
+ - mnist
24
+ - dtd
25
+ - oxford_flowers102
26
+ - pcam
27
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
28
+ _recursive_: False
29
+ processor: openai/clip-vit-base-patch32
30
+ models:
31
+ _pretrained_: openai/clip-vit-base-patch32
32
+ sun397: tanganke/clip-vit-base-patch32_sun397
33
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
34
+ resisc45: tanganke/clip-vit-base-patch32_resisc45
35
+ eurosat: tanganke/clip-vit-base-patch32_eurosat
36
+ svhn: tanganke/clip-vit-base-patch32_svhn
37
+ gtsrb: tanganke/clip-vit-base-patch32_gtsrb
38
+ mnist: tanganke/clip-vit-base-patch32_mnist
39
+ dtd: tanganke/clip-vit-base-patch32_dtd
40
+ oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102
41
+ pcam: tanganke/clip-vit-base-patch32_pcam
42
+ platform: hf
@@ -1,8 +1,52 @@
1
- # The 20 task used in the paper:
1
+ # The 12 task used in the paper (TALL mask):
2
2
  # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
3
  # http://arxiv.org/abs/2405.07813
4
4
  defaults:
5
- - CLIPVisionModelPool@: _template
6
- - /model/clip-vit@models: clip-vit-base-patch32_TALL12
7
- - /dataset/image_classification/train@train_datasets: TALL12
8
- - /dataset/image_classification/test@test_datasets: TALL12
5
+ - /dataset/image_classification/train@train_datasets:
6
+ # -- begin of eight tasks in the task arithmetic paper ---
7
+ - sun397
8
+ - stanford-cars
9
+ - resisc45
10
+ - eurosat
11
+ - svhn
12
+ - gtsrb
13
+ - mnist
14
+ - dtd
15
+ # -- end of eight tasks in the task arithmetic paper ---
16
+ - oxford_flowers102
17
+ - pcam
18
+ - fer2013
19
+ - oxford-iiit-pet
20
+ - /dataset/image_classification/test@test_datasets:
21
+ # -- begin of eight tasks in the task arithmetic paper ---
22
+ - sun397
23
+ - stanford-cars
24
+ - resisc45
25
+ - eurosat
26
+ - svhn
27
+ - gtsrb
28
+ - mnist
29
+ - dtd
30
+ # -- end of eight tasks in the task arithmetic paper ---
31
+ - oxford_flowers102
32
+ - pcam
33
+ - fer2013
34
+ - oxford-iiit-pet
35
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
36
+ _recursive_: False
37
+ processor: openai/clip-vit-base-patch32
38
+ models:
39
+ _pretrained_: openai/clip-vit-base-patch32
40
+ sun397: tanganke/clip-vit-base-patch32_sun397
41
+ stanford-cars: tanganke/clip-vit-base-patch32_stanford-cars
42
+ resisc45: tanganke/clip-vit-base-patch32_resisc45
43
+ eurosat: tanganke/clip-vit-base-patch32_eurosat
44
+ svhn: tanganke/clip-vit-base-patch32_svhn
45
+ gtsrb: tanganke/clip-vit-base-patch32_gtsrb
46
+ mnist: tanganke/clip-vit-base-patch32_mnist
47
+ dtd: tanganke/clip-vit-base-patch32_dtd
48
+ oxford_flowers102: tanganke/clip-vit-base-patch32_oxford_flowers102
49
+ pcam: tanganke/clip-vit-base-patch32_pcam
50
+ fer2013: tanganke/clip-vit-base-patch32_fer2013
51
+ oxford-iiit-pet: tanganke/clip-vit-base-patch32_oxford-iiit-pet
52
+ platform: hf