fusion-bench 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. fusion_bench/method/__init__.py +4 -0
  2. fusion_bench/method/adamerging/flan_t5_layer_wise_adamerging.py +1 -1
  3. fusion_bench/method/adamerging/gpt2_layer_wise_adamerging.py +1 -1
  4. fusion_bench/method/base_algorithm.py +1 -0
  5. fusion_bench/method/dawe/dawe_for_clip.py +1 -1
  6. fusion_bench/method/depth_upscaling/depth_upscaling_for_llama.py +3 -2
  7. fusion_bench/method/fw_merging/__init__.py +2 -0
  8. fusion_bench/method/fw_merging/fw_hard.py +448 -0
  9. fusion_bench/method/fw_merging/fw_soft.py +519 -0
  10. fusion_bench/method/fw_merging/utils.py +331 -0
  11. fusion_bench/method/gossip/flan_t5_layer_wise_gossip.py +1 -1
  12. fusion_bench/method/moe_pruner/__init__.py +7 -0
  13. fusion_bench/method/moe_pruner/hooks/__init__.py +6 -0
  14. fusion_bench/method/moe_pruner/hooks/deepseek_v2.py +85 -0
  15. fusion_bench/method/moe_pruner/hooks/hook.py +23 -0
  16. fusion_bench/method/moe_pruner/hooks/mixtral.py +93 -0
  17. fusion_bench/method/moe_pruner/moe_pruner.py +304 -0
  18. fusion_bench/method/moe_pruner/utils/__init__.py +1 -0
  19. fusion_bench/method/moe_pruner/utils/data.py +154 -0
  20. fusion_bench/method/moe_pruner/utils/layerwrapper.py +61 -0
  21. fusion_bench/method/moe_pruner/utils/prune.py +313 -0
  22. fusion_bench/method/moe_pruner/utils/score.py +41 -0
  23. fusion_bench/method/pruning/__init__.py +1 -0
  24. fusion_bench/method/pruning/llama_sparsegpt_prune.py +223 -0
  25. fusion_bench/method/pruning/sparsegpt_utils/__init__.py +1 -0
  26. fusion_bench/method/pruning/sparsegpt_utils/sparsegpt.py +128 -0
  27. fusion_bench/method/pruning/wanda_utils/data.py +33 -14
  28. fusion_bench/method/pwe_moe/module.py +2 -7
  29. fusion_bench/method/randes/__init__.py +15 -0
  30. fusion_bench/method/randes/base_algorithm.py +1013 -0
  31. fusion_bench/method/randes/modelsoup.py +126 -0
  32. fusion_bench/method/randes/task_arithmetic.py +318 -0
  33. fusion_bench/method/simple_average.py +3 -2
  34. fusion_bench/method/sparselo/sparselo.py +20 -2
  35. fusion_bench/method/tall_mask/__init__.py +1 -0
  36. fusion_bench/method/tall_mask/task_arithmetic.py +133 -0
  37. fusion_bench/method/task_singular_vector/TSVM.py +238 -25
  38. fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +52 -20
  39. fusion_bench/mixins/hydra_config.py +1 -1
  40. fusion_bench/mixins/lightning_fabric.py +25 -1
  41. fusion_bench/mixins/serialization.py +18 -2
  42. fusion_bench/modelpool/base_pool.py +1 -0
  43. fusion_bench/modelpool/clip_vision/modelpool.py +21 -13
  44. fusion_bench/modelpool/lazy_state_dict_pool.py +15 -0
  45. fusion_bench/models/modeling_deepseek_v2/__init__.py +15 -0
  46. fusion_bench/models/modeling_deepseek_v2/configuration_deepseek.py +208 -0
  47. fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py +1922 -0
  48. fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py +38 -0
  49. fusion_bench/models/parameter_dict.py +6 -1
  50. fusion_bench/programs/fabric_fusion_program.py +14 -5
  51. fusion_bench/taskpool/base_pool.py +1 -0
  52. fusion_bench/taskpool/clip_vision/taskpool.py +8 -1
  53. fusion_bench/taskpool/dummy.py +6 -4
  54. fusion_bench/utils/__init__.py +2 -1
  55. fusion_bench/utils/data.py +1 -1
  56. fusion_bench/utils/{instantiate.py → instantiate_utils.py} +3 -0
  57. fusion_bench/utils/lazy_state_dict.py +268 -0
  58. fusion_bench/utils/parameters.py +33 -0
  59. fusion_bench/utils/pylogger.py +28 -0
  60. fusion_bench/utils/state_dict_arithmetic.py +74 -2
  61. fusion_bench/utils/type.py +1 -0
  62. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/METADATA +8 -2
  63. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/RECORD +104 -44
  64. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/WHEEL +1 -1
  65. fusion_bench_config/dataset/image_classification/test/TALL10.yaml +28 -0
  66. fusion_bench_config/dataset/image_classification/test/TALL12.yaml +28 -0
  67. fusion_bench_config/dataset/image_classification/test/TALL16.yaml +28 -0
  68. fusion_bench_config/dataset/image_classification/test/TALL18.yaml +28 -0
  69. fusion_bench_config/dataset/image_classification/train/TALL10.yaml +28 -0
  70. fusion_bench_config/dataset/image_classification/train/TALL12.yaml +28 -0
  71. fusion_bench_config/dataset/image_classification/train/TALL16.yaml +28 -0
  72. fusion_bench_config/dataset/image_classification/train/TALL18.yaml +28 -0
  73. fusion_bench_config/fabric_model_fusion.yaml +2 -2
  74. fusion_bench_config/method/fw_merging/fw_hard.yaml +11 -0
  75. fusion_bench_config/method/fw_merging/fw_soft.yaml +12 -0
  76. fusion_bench_config/method/moe_pruner/moe_pruner.yaml +15 -0
  77. fusion_bench_config/method/pruning/llama_sparsegpt_pruning.yaml +16 -0
  78. fusion_bench_config/method/randes/superposed_model_soup.yaml +18 -0
  79. fusion_bench_config/method/randes/superposed_task_arithmetic.yaml +20 -0
  80. fusion_bench_config/method/randes/superposed_task_arithmetic_lora.yaml +20 -0
  81. fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +2 -1
  82. fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +1 -1
  83. fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +1 -1
  84. fusion_bench_config/method/tall_mask/task_arithmetic.yaml +4 -0
  85. fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +2 -1
  86. fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL10.yaml +29 -0
  87. fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL12.yaml +29 -0
  88. fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL16.yaml +29 -0
  89. fusion_bench_config/model/clip-vit/clip-vit-base-patch32_TALL18.yaml +29 -0
  90. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL10.yaml +8 -0
  91. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL12.yaml +8 -0
  92. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL16.yaml +8 -0
  93. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TALL18.yaml +8 -0
  94. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_cars_and_dtd.yaml +16 -0
  95. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_cars.yaml +16 -0
  96. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_and_dtd.yaml +16 -0
  97. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_sun397_cars_and_dtd.yaml +19 -0
  98. fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml +15 -0
  99. fusion_bench_config/modelpool/CausalLMPool/mixtral-8x7b.yaml +14 -0
  100. fusion_bench_config/modelpool/SeqenceClassificationModelPool/roberta-base_glue.yaml +69 -0
  101. fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml +0 -1
  102. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/entry_points.txt +0 -0
  103. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/licenses/LICENSE +0 -0
  104. {fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,28 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # eight tasks in the task arithmetic paper
6
+ - sun397
7
+ - stanford-cars
8
+ - resisc45
9
+ - eurosat
10
+ - svhn
11
+ - gtsrb
12
+ - mnist
13
+ - dtd
14
+ # additional 6 tasks in the TALL mask paper (TALL 14)
15
+ - oxford_flowers102
16
+ - pcam
17
+ - fer2013
18
+ - oxford-iiit-pet
19
+ - stl10
20
+ - cifar100
21
+ # additional 6 tasks in the TALL mask paper (TALL 20)
22
+ - cifar10
23
+ - food101
24
+ # - fashion_mnist
25
+ # - emnist_letters
26
+ # - kmnist
27
+ # - rendered-sst2
28
+
@@ -0,0 +1,28 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # eight tasks in the task arithmetic paper
6
+ - sun397
7
+ - stanford-cars
8
+ - resisc45
9
+ - eurosat
10
+ - svhn
11
+ - gtsrb
12
+ - mnist
13
+ - dtd
14
+ # additional 6 tasks in the TALL mask paper (TALL 14)
15
+ - oxford_flowers102
16
+ - pcam
17
+ - fer2013
18
+ - oxford-iiit-pet
19
+ - stl10
20
+ - cifar100
21
+ # additional 6 tasks in the TALL mask paper (TALL 20)
22
+ - cifar10
23
+ - food101
24
+ - fashion_mnist
25
+ - emnist_letters
26
+ # - kmnist
27
+ # - rendered-sst2
28
+
@@ -9,11 +9,11 @@ defaults:
9
9
  _target_: fusion_bench.programs.FabricModelFusionProgram
10
10
  _recursive_: false
11
11
  fast_dev_run: false # Run a single batch of data to test the model or method
12
- # Run the script without actually running the experiment, use with `print_config=true`.
12
+ # Run the script without actually running the experiment, use with `print_config=true`.
13
13
  # You can also use `--cfg` or `-c` to show the configuration instead of running.
14
14
  dry_run: false
15
15
  print_config: true # Print the configuration to the console
16
16
  merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
17
17
  merged_model_save_kwargs: null
18
- report_save_path: null # path to save the result report
18
+ report_save_path: "{log_dir}/program_report.json" # path to save the result report
19
19
  print_function_call: true # set to false if you don't want to print the details of instantiate calls
@@ -0,0 +1,11 @@
1
+ _target_: fusion_bench.method.FrankWolfeHardAlgorithm
2
+ merge_fn: task_arithmetic
3
+ max_iters: 10
4
+ step_size: 0.1
5
+ dataset_size: 100
6
+ tasks: []
7
+ init_weight:
8
+ loss_fn: cross_entropy
9
+ scaling_factor: 0.3
10
+ max_num_models: 100
11
+ granularity: task
@@ -0,0 +1,12 @@
1
+ _target_: fusion_bench.method.FrankWolfeSoftAlgorithm
2
+ init_weight:
3
+ max_iters: 10
4
+ merge_fn: 'adamerging'
5
+ tasks:
6
+ ada_iters: 500
7
+ dataset_size: 100
8
+ ada_coeff: 1e-8
9
+ step_size: 0.1
10
+ max_num_models: 100
11
+ granularity: task
12
+ ada_loss: entropy_loss
@@ -0,0 +1,15 @@
1
+ _target_: fusion_bench.method.moe_pruner.MoEPruner
2
+
3
+ nsamples: 100
4
+ seed: 42
5
+ device: cuda
6
+ max_seqlen: 2048
7
+ # `prune_type` can be either `unstructured` or `semistructured`
8
+ prune_type: unstructured
9
+ # === options for unstructured pruning ===
10
+ # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned
11
+ sparsity_ratio: 0.5
12
+ # === options for semistructured pruning ===
13
+ # 2:4 means 2 out of 4 weights are pruned
14
+ n: 2
15
+ m: 4
@@ -0,0 +1,16 @@
1
+ _target_: fusion_bench.method.SparseGPTPruningForLlama
2
+ nsamples: 128
3
+ seed: 0
4
+ use_variant: false
5
+ # `prune_type` can be either `unstructured` or `semistructured`
6
+ prune_type: unstructured
7
+ # device and dtype to compute the pruning mask
8
+ device: cuda
9
+ dtype: null
10
+ # === options for unstructured pruning ===
11
+ # `sparsity_ratio` is the ratio of weights to be pruned, 1 means all weights are pruned
12
+ sparsity_ratio: 0.5
13
+ # === options for semistructured pruning ===
14
+ # 2:4 means 2 out of 4 weights are pruned
15
+ n: 2
16
+ m: 4
@@ -0,0 +1,18 @@
1
+ _target_: fusion_bench.method.randes.SuperposedModelSoupAlgorithm
2
+ #* === base randes options ===
3
+ mode: manual_absorption
4
+ # weights for all mlp and attn layers
5
+ target_layer:
6
+ - mlp_w
7
+ - attn_w
8
+ random_seed: 42 # for random_binary_diagonal_matrix
9
+ different_across_layers: True
10
+ joint_matrix_mode: flatten_hstack
11
+ rank: 1 # for columnwise svd
12
+ random_components: False
13
+ shift_layers: 0
14
+ absorber: None
15
+ debug: 0
16
+ ms_mode: average
17
+ verbose: 0 # level of verbosity
18
+ dropout_rate: 1 # take the target layer per n target layers
@@ -0,0 +1,20 @@
1
+ name: superposed_task_arithmetic
2
+ #* === base randes options ===
3
+ mode: random_binary_diagonal_matrix
4
+ # weights for all mlp and attn layers
5
+ target_layer:
6
+ - mlp_w
7
+ - attn_w
8
+ random_seed: 42 # for random_binary_diagonal_matrix
9
+ different_across_layers: True
10
+ joint_matrix_mode: flatten_hstack
11
+ rank: 1 # for columnwise svd
12
+ random_components: False
13
+ shift_layers: 0
14
+ debug: 0
15
+ verbose: 0
16
+ dropout_rate: 1
17
+ #* === task arithmetic options ===
18
+ scaling_factor: 0.5
19
+ # path to save/load the model
20
+ model_path: null
@@ -0,0 +1,20 @@
1
+ _target_: fusion_bench.method.randes.SuperposedTaskArithmeticLoRAAlgorithm
2
+ #* === base randes options ===
3
+ mode: random_binary_diagonal_matrix
4
+ # weights for all mlp and attn layers
5
+ target_layer:
6
+ - mlp_w
7
+ - attn_w
8
+ random_seed: 42 # for random_binary_diagonal_matrix
9
+ different_across_layers: True
10
+ joint_matrix_mode: flatten_hstack
11
+ rank: 1 # for columnwise svd
12
+ random_components: False
13
+ shift_layers: 0
14
+ debug: 0
15
+ verbose: 0
16
+ dropout_rate: 1
17
+ #* === task arithmetic options ===
18
+ scaling_factor: 0.5
19
+ # path to save/load the model
20
+ model_path: null
@@ -1,10 +1,11 @@
1
- _target_: fusion_bench.method.losparse.sparselo.IterativeSparseLoForLlama
1
+ _target_: fusion_bench.method.sparselo.sparselo.IterativeSparseLoForLlama
2
2
  _recursive_: false
3
3
  nsamples: 128
4
4
  seed: 0
5
5
  rank: 128
6
6
  num_iterations: 10
7
7
  variant: wanda
8
+ use_reference_model: false
8
9
  # `prune_type` can be either `unstructured` or `semistructured`
9
10
  prune_type: unstructured
10
11
  # device and dtype to compute the pruning mask
@@ -1,4 +1,4 @@
1
- _target_: fusion_bench.method.losparse.sparselo.PCPSparseLoForLlama
1
+ _target_: fusion_bench.method.sparselo.sparselo.PCPSparseLoForLlama
2
2
  _recursive_: false
3
3
  nsamples: 128
4
4
  seed: 0
@@ -1,4 +1,4 @@
1
- _target_: fusion_bench.method.losparse.sparselo.SparseLoForLlama
1
+ _target_: fusion_bench.method.sparselo.sparselo.SparseLoForLlama
2
2
  _recursive_: false
3
3
  nsamples: 128
4
4
  seed: 0
@@ -0,0 +1,4 @@
1
+ _target_: fusion_bench.method.tall_mask.TallMaskTaskArithmeticAlgorithm
2
+ tall_mask_lambda: 0.6
3
+ debug: 0
4
+ verbose: 0
@@ -1,7 +1,8 @@
1
1
  _target_: fusion_bench.method.TaskSingularVectorMerging
2
- remove_keys: null
2
+ exclude_keys: null
3
3
  # alpha is a float or a list of floats
4
4
  # example:
5
5
  # alpha: 1
6
6
  # alpha: [1, 0.5, 0.25]
7
7
  alpha: 1
8
+ return_single_task_models: false
@@ -0,0 +1,29 @@
1
+ # The 20 tasks used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # pre-trained model
6
+ - clip-vit-base-patch32
7
+ # eight tasks in the task arithmetic paper
8
+ - clip-vit-base-patch32_sun397
9
+ - clip-vit-base-patch32_stanford-cars
10
+ - clip-vit-base-patch32_resisc45
11
+ - clip-vit-base-patch32_eurosat
12
+ - clip-vit-base-patch32_svhn
13
+ - clip-vit-base-patch32_gtsrb
14
+ - clip-vit-base-patch32_mnist
15
+ - clip-vit-base-patch32_dtd
16
+ # additional 6 tasks in the TALL mask paper (TALL 14)
17
+ - clip-vit-base-patch32_oxford_flowers102
18
+ - clip-vit-base-patch32_pcam
19
+ # - clip-vit-base-patch32_fer2013
20
+ # - clip-vit-base-patch32_oxford-iiit-pet
21
+ # - clip-vit-base-patch32_stl10
22
+ # - clip-vit-base-patch32_cifar100
23
+ # additional 6 tasks in the TALL mask paper (TALL 20)
24
+ # - clip-vit-base-patch32_cifar10
25
+ # - clip-vit-base-patch32_food101
26
+ # - clip-vit-base-patch32_fashion_mnist
27
+ # - clip-vit-base-patch32_emnist_letters
28
+ # - clip-vit-base-patch32_kmnist
29
+ # - clip-vit-base-patch32_rendered-sst2
@@ -0,0 +1,29 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # pre-trained model
6
+ - clip-vit-base-patch32
7
+ # eight tasks in the task arithmetic paper
8
+ - clip-vit-base-patch32_sun397
9
+ - clip-vit-base-patch32_stanford-cars
10
+ - clip-vit-base-patch32_resisc45
11
+ - clip-vit-base-patch32_eurosat
12
+ - clip-vit-base-patch32_svhn
13
+ - clip-vit-base-patch32_gtsrb
14
+ - clip-vit-base-patch32_mnist
15
+ - clip-vit-base-patch32_dtd
16
+ # additional 6 tasks in the TALL mask paper (TALL 14)
17
+ - clip-vit-base-patch32_oxford_flowers102
18
+ - clip-vit-base-patch32_pcam
19
+ - clip-vit-base-patch32_fer2013
20
+ - clip-vit-base-patch32_oxford-iiit-pet
21
+ # - clip-vit-base-patch32_stl10
22
+ # - clip-vit-base-patch32_cifar100
23
+ # additional 6 tasks in the TALL mask paper (TALL 20)
24
+ # - clip-vit-base-patch32_cifar10
25
+ # - clip-vit-base-patch32_food101
26
+ # - clip-vit-base-patch32_fashion_mnist
27
+ # - clip-vit-base-patch32_emnist_letters
28
+ # - clip-vit-base-patch32_kmnist
29
+ # - clip-vit-base-patch32_rendered-sst2
@@ -0,0 +1,29 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # pre-trained model
6
+ - clip-vit-base-patch32
7
+ # eight tasks in the task arithmetic paper
8
+ - clip-vit-base-patch32_sun397
9
+ - clip-vit-base-patch32_stanford-cars
10
+ - clip-vit-base-patch32_resisc45
11
+ - clip-vit-base-patch32_eurosat
12
+ - clip-vit-base-patch32_svhn
13
+ - clip-vit-base-patch32_gtsrb
14
+ - clip-vit-base-patch32_mnist
15
+ - clip-vit-base-patch32_dtd
16
+ # additional 6 tasks in the TALL mask paper (TALL 14)
17
+ - clip-vit-base-patch32_oxford_flowers102
18
+ - clip-vit-base-patch32_pcam
19
+ - clip-vit-base-patch32_fer2013
20
+ - clip-vit-base-patch32_oxford-iiit-pet
21
+ - clip-vit-base-patch32_stl10
22
+ - clip-vit-base-patch32_cifar100
23
+ # additional 6 tasks in the TALL mask paper (TALL 20)
24
+ - clip-vit-base-patch32_cifar10
25
+ - clip-vit-base-patch32_food101
26
+ # - clip-vit-base-patch32_fashion_mnist
27
+ # - clip-vit-base-patch32_emnist_letters
28
+ # - clip-vit-base-patch32_kmnist
29
+ # - clip-vit-base-patch32_rendered-sst2
@@ -0,0 +1,29 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ # pre-trained model
6
+ - clip-vit-base-patch32
7
+ # eight tasks in the task arithmetic paper
8
+ - clip-vit-base-patch32_sun397
9
+ - clip-vit-base-patch32_stanford-cars
10
+ - clip-vit-base-patch32_resisc45
11
+ - clip-vit-base-patch32_eurosat
12
+ - clip-vit-base-patch32_svhn
13
+ - clip-vit-base-patch32_gtsrb
14
+ - clip-vit-base-patch32_mnist
15
+ - clip-vit-base-patch32_dtd
16
+ # additional 6 tasks in the TALL mask paper (TALL 14)
17
+ - clip-vit-base-patch32_oxford_flowers102
18
+ - clip-vit-base-patch32_pcam
19
+ - clip-vit-base-patch32_fer2013
20
+ - clip-vit-base-patch32_oxford-iiit-pet
21
+ - clip-vit-base-patch32_stl10
22
+ - clip-vit-base-patch32_cifar100
23
+ # additional 6 tasks in the TALL mask paper (TALL 20)
24
+ - clip-vit-base-patch32_cifar10
25
+ - clip-vit-base-patch32_food101
26
+ - clip-vit-base-patch32_fashion_mnist
27
+ - clip-vit-base-patch32_emnist_letters
28
+ # - clip-vit-base-patch32_kmnist
29
+ # - clip-vit-base-patch32_rendered-sst2
@@ -0,0 +1,8 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ - CLIPVisionModelPool@: _template
6
+ - /model/clip-vit@models: clip-vit-base-patch32_TALL10
7
+ - /dataset/image_classification/train@train_datasets: TALL10
8
+ - /dataset/image_classification/test@test_datasets: TALL10
@@ -0,0 +1,8 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ - CLIPVisionModelPool@: _template
6
+ - /model/clip-vit@models: clip-vit-base-patch32_TALL12
7
+ - /dataset/image_classification/train@train_datasets: TALL12
8
+ - /dataset/image_classification/test@test_datasets: TALL12
@@ -0,0 +1,8 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ - CLIPVisionModelPool@: _template
6
+ - /model/clip-vit@models: clip-vit-base-patch32_TALL16
7
+ - /dataset/image_classification/train@train_datasets: TALL16
8
+ - /dataset/image_classification/test@test_datasets: TALL16
@@ -0,0 +1,8 @@
1
+ # The 20 task used in the paper:
2
+ # Wang et al. Localizing Task Information for Improved Model Merging and Compression
3
+ # http://arxiv.org/abs/2405.07813
4
+ defaults:
5
+ - CLIPVisionModelPool@: _template
6
+ - /model/clip-vit@models: clip-vit-base-patch32_TALL18
7
+ - /dataset/image_classification/train@train_datasets: TALL18
8
+ - /dataset/image_classification/test@test_datasets: TALL18
@@ -0,0 +1,16 @@
1
+ defaults:
2
+ - /model/clip-vit@models:
3
+ - clip-vit-base-patch32
4
+ - clip-vit-base-patch32_stanford-cars
5
+ - clip-vit-base-patch32_dtd
6
+ - /dataset/image_classification/train@train_datasets:
7
+ - stanford-cars
8
+ - dtd
9
+ - /dataset/image_classification/test@test_datasets:
10
+ - stanford-cars
11
+ - dtd
12
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
13
+ _recursive_: False
14
+ processor:
15
+ _target_: transformers.CLIPProcessor.from_pretrained
16
+ pretrained_model_name_or_path: openai/clip-vit-base-patch32
@@ -0,0 +1,16 @@
1
+ defaults:
2
+ - /model/clip-vit@models:
3
+ - clip-vit-base-patch32
4
+ - clip-vit-base-patch32_sun397
5
+ - clip-vit-base-patch32_stanford-cars
6
+ - /dataset/image_classification/train@train_datasets:
7
+ - sun397
8
+ - stanford-cars
9
+ - /dataset/image_classification/test@test_datasets:
10
+ - sun397
11
+ - stanford-cars
12
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
13
+ _recursive_: False
14
+ processor:
15
+ _target_: transformers.CLIPProcessor.from_pretrained
16
+ pretrained_model_name_or_path: openai/clip-vit-base-patch32
@@ -0,0 +1,16 @@
1
+ defaults:
2
+ - /model/clip-vit@models:
3
+ - clip-vit-base-patch32
4
+ - clip-vit-base-patch32_sun397
5
+ - clip-vit-base-patch32_dtd
6
+ - /dataset/image_classification/train@train_datasets:
7
+ - sun397
8
+ - dtd
9
+ - /dataset/image_classification/test@test_datasets:
10
+ - sun397
11
+ - dtd
12
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
13
+ _recursive_: False
14
+ processor:
15
+ _target_: transformers.CLIPProcessor.from_pretrained
16
+ pretrained_model_name_or_path: openai/clip-vit-base-patch32
@@ -0,0 +1,19 @@
1
+ defaults:
2
+ - /model/clip-vit@models:
3
+ - clip-vit-base-patch32
4
+ - clip-vit-base-patch32_sun397
5
+ - clip-vit-base-patch32_stanford-cars
6
+ - clip-vit-base-patch32_dtd
7
+ - /dataset/image_classification/train@train_datasets:
8
+ - sun397
9
+ - stanford-cars
10
+ - dtd
11
+ - /dataset/image_classification/test@test_datasets:
12
+ - sun397
13
+ - stanford-cars
14
+ - dtd
15
+ _target_: fusion_bench.modelpool.CLIPVisionModelPool
16
+ _recursive_: False
17
+ processor:
18
+ _target_: transformers.CLIPProcessor.from_pretrained
19
+ pretrained_model_name_or_path: openai/clip-vit-base-patch32
@@ -0,0 +1,15 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+
3
+ pretrained_model_name_or_path: deepseek-ai/DeepSeek-V2-Lite
4
+
5
+ models:
6
+ _pretrained_:
7
+ _target_: fusion_bench.models.modeling_deepseek_v2.DeepseekV2ForCausalLM.from_pretrained
8
+ pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
9
+ torch_dtype: bfloat16
10
+ device_map: auto
11
+ trust_remote_code: true
12
+
13
+ tokenizer:
14
+ _target_: transformers.AutoTokenizer.from_pretrained
15
+ pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
@@ -0,0 +1,14 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+
3
+ pretrained_model_name_or_path: mistralai/Mixtral-8x7B-v0.1
4
+
5
+ models:
6
+ _pretrained_:
7
+ _target_: transformers.AutoModelForCausalLM.from_pretrained
8
+ pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
9
+ torch_dtype: bfloat16
10
+ device_map: auto
11
+
12
+ tokenizer:
13
+ _target_: transformers.AutoTokenizer.from_pretrained
14
+ pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
@@ -0,0 +1,69 @@
1
+ defaults:
2
+ - Seq2SeqLMPool@: _template
3
+ - /model/roberta@models:
4
+ - roberta_base
5
+ - roberta_glue-cola
6
+ - roberta_glue-mnli
7
+ - roberta_glue-mrpc
8
+ - roberta_glue-qnli
9
+ - roberta_glue-qqp
10
+ - roberta_glue-rte
11
+ - roberta_glue-sst2
12
+ - roberta_glue-stsb
13
+ # _target_: fusion_bench.modelpool.SequenceClassificationModelPool
14
+ # _recursive_: false
15
+
16
+ _dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
17
+ test_datasets:
18
+ glue-cola:
19
+ _target_: ${..._dataset_loader}
20
+ _recursive_: false
21
+ name: cola
22
+ tokenizer: ${...tokenizer}
23
+ split: validation
24
+ glue-mnli:
25
+ _target_: ${..._dataset_loader}
26
+ _recursive_: false
27
+ name: mnli
28
+ tokenizer: ${...tokenizer}
29
+ split: validation_matched
30
+ glue-mrpc:
31
+ _target_: ${..._dataset_loader}
32
+ _recursive_: false
33
+ name: mrpc
34
+ tokenizer: ${...tokenizer}
35
+ split: validation
36
+ glue-qnli:
37
+ _target_: ${..._dataset_loader}
38
+ _recursive_: false
39
+ name: qnli
40
+ tokenizer: ${...tokenizer}
41
+ split: validation
42
+ glue-qqp:
43
+ _target_: ${..._dataset_loader}
44
+ _recursive_: false
45
+ name: qqp
46
+ tokenizer: ${...tokenizer}
47
+ split: validation
48
+ glue-rte:
49
+ _target_: ${..._dataset_loader}
50
+ _recursive_: false
51
+ name: rte
52
+ tokenizer: ${...tokenizer}
53
+ split: validation
54
+ glue-sst2:
55
+ _target_: ${..._dataset_loader}
56
+ _recursive_: false
57
+ name: sst2
58
+ tokenizer: ${...tokenizer}
59
+ split: validation
60
+ glue-stsb:
61
+ _target_: ${..._dataset_loader}
62
+ _recursive_: false
63
+ name: stsb
64
+ tokenizer: ${...tokenizer}
65
+ split: validation
66
+
67
+ tokenizer:
68
+ _target_: transformers.AutoTokenizer.from_pretrained
69
+ pretrained_model_name_or_path: roberta-base
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.taskpool.LMEvalHarnessTaskPool
2
-
3
2
  tasks:
4
3
  - truthfulqa
5
4
  batch_size: 1