fusion-bench 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (209) hide show
  1. fusion_bench/compat/method/__init__.py +2 -0
  2. fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py +4 -1
  3. fusion_bench/constants/clip_vision.py +22 -0
  4. fusion_bench/dataset/clip_dataset.py +10 -2
  5. fusion_bench/dataset/fer2013.py +1 -0
  6. fusion_bench/dataset/gsm8k.py +2 -2
  7. fusion_bench/method/__init__.py +10 -0
  8. fusion_bench/method/ada_svd/clip_vision.py +4 -1
  9. fusion_bench/method/adamerging/clip_task_wise_adamerging.py +1 -29
  10. fusion_bench/method/fisher_merging/fisher_merging.py +29 -17
  11. fusion_bench/method/gossip/__init__.py +3 -0
  12. fusion_bench/method/gossip/clip_layer_wise_gossip.py +43 -0
  13. fusion_bench/method/gossip/clip_task_wise_gossip.py +190 -0
  14. fusion_bench/method/gossip/entropy_loss.py +25 -0
  15. fusion_bench/method/gossip/flan_t5_layer_wise_gossip.py +388 -0
  16. fusion_bench/method/gossip/layer_wise_gossip.py +434 -0
  17. fusion_bench/method/gossip/min_norm_solvers.py +227 -0
  18. fusion_bench/method/gossip/task_wise_gossip.py +265 -0
  19. fusion_bench/method/gossip/utils.py +74 -0
  20. fusion_bench/method/isotropic_merging/__init__.py +1 -1
  21. fusion_bench/method/opcm/opcm.py +16 -7
  22. fusion_bench/method/pwe_moe/module.py +1 -1
  23. fusion_bench/method/pwe_moe/openclip_pwe_moe.py +476 -0
  24. fusion_bench/method/regmean/regmean.py +25 -17
  25. fusion_bench/method/smile_upscaling/__init__.py +1 -1
  26. fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +46 -145
  27. fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +229 -0
  28. fusion_bench/method/smile_upscaling/smile_upscaling.py +19 -346
  29. fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +7 -0
  30. fusion_bench/method/task_arithmetic/task_arithmetic.py +8 -6
  31. fusion_bench/method/ties_merging/ties_merging.py +36 -31
  32. fusion_bench/method/we_moe/we_moe.py +14 -15
  33. fusion_bench/mixins/__init__.py +6 -3
  34. fusion_bench/mixins/hydra_config.py +49 -0
  35. fusion_bench/mixins/openclip_classification.py +11 -0
  36. fusion_bench/mixins/simple_profiler.py +4 -2
  37. fusion_bench/modelpool/__init__.py +3 -1
  38. fusion_bench/modelpool/base_pool.py +2 -2
  39. fusion_bench/modelpool/openclip_vision/__init__.py +1 -0
  40. fusion_bench/modelpool/openclip_vision/modelpool.py +255 -0
  41. fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py +2 -203
  42. fusion_bench/models/modeling_smile_qwen2/__init__.py +8 -0
  43. fusion_bench/models/modeling_smile_qwen2/configuration_smile_qwen2.py +21 -0
  44. fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +922 -0
  45. fusion_bench/models/modeling_smile_qwen2/register.py +11 -0
  46. fusion_bench/models/open_clip/__init__.py +6 -0
  47. fusion_bench/models/open_clip/modeling.py +176 -0
  48. fusion_bench/models/open_clip/utils.py +311 -0
  49. fusion_bench/models/open_clip/variables_and_paths.py +56 -0
  50. fusion_bench/models/parameter_dict.py +54 -13
  51. fusion_bench/models/rankone_moe.py +2 -88
  52. fusion_bench/models/smile_moe/linear_from_hf_config.py +373 -0
  53. fusion_bench/models/smile_moe/{linear.py → linear_from_module.py} +103 -33
  54. fusion_bench/models/smile_moe/utils/__init__.py +24 -0
  55. fusion_bench/models/smile_moe/utils/svd_utils.py +46 -0
  56. fusion_bench/scripts/nyuv2_mtl_train.py +1 -1
  57. fusion_bench/taskpool/__init__.py +7 -3
  58. fusion_bench/taskpool/clip_vision/__init__.py +1 -0
  59. fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +2 -30
  60. fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py +102 -0
  61. fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py +2 -30
  62. fusion_bench/taskpool/clip_vision/taskpool.py +1 -2
  63. fusion_bench/taskpool/clip_vision/utils/__init__.py +0 -0
  64. fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py +65 -0
  65. fusion_bench/taskpool/gpt2_text_classification.py +30 -1
  66. fusion_bench/taskpool/lm_eval_harness/__init__.py +3 -0
  67. fusion_bench/taskpool/lm_eval_harness/taskpool.py +87 -0
  68. fusion_bench/taskpool/openclip_vision/__init__.py +1 -0
  69. fusion_bench/taskpool/openclip_vision/openclip_taskpool.py +196 -0
  70. fusion_bench/utils/data.py +12 -0
  71. fusion_bench/utils/devices.py +14 -0
  72. fusion_bench/utils/instantiate.py +12 -0
  73. fusion_bench/utils/misc.py +9 -2
  74. fusion_bench/utils/packages.py +14 -0
  75. fusion_bench/utils/parameters.py +1 -1
  76. fusion_bench/utils/tensorboard.py +1 -1
  77. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/METADATA +22 -2
  78. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/RECORD +209 -157
  79. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/WHEEL +1 -1
  80. fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -2
  81. fusion_bench_config/dataset/image_classification/test/TALL20.yaml +0 -1
  82. fusion_bench_config/dataset/image_classification/test/emnist_letters.yaml +0 -1
  83. fusion_bench_config/dataset/image_classification/test/fashion_mnist.yaml +1 -1
  84. fusion_bench_config/dataset/image_classification/train/TALL20.yaml +0 -1
  85. fusion_bench_config/dataset/image_classification/train/fashion_mnist.yaml +1 -1
  86. fusion_bench_config/fabric/auto.yaml +0 -1
  87. fusion_bench_config/fabric/llama_ddp.yaml +0 -1
  88. fusion_bench_config/fabric/llama_fsdp.yaml +0 -1
  89. fusion_bench_config/fabric/llama_peft_fsdp.yaml +0 -1
  90. fusion_bench_config/fabric/strategy/deepspeed.yaml +0 -1
  91. fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +0 -1
  92. fusion_bench_config/fabric_model_fusion.yaml +0 -1
  93. fusion_bench_config/llama_full_finetune.yaml +0 -2
  94. fusion_bench_config/llama_model_fusion.yaml +0 -2
  95. fusion_bench_config/method/ada_svd/clip_vision.yaml +0 -1
  96. fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml +0 -5
  97. fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml +0 -5
  98. fusion_bench_config/method/adamerging/llama_sft.yaml +0 -2
  99. fusion_bench_config/method/adamerging.yaml +2 -2
  100. fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml +0 -1
  101. fusion_bench_config/method/analysis/task_vector_violin_plot.yaml +0 -1
  102. fusion_bench_config/method/classification/clip_continual_finetune.yaml +0 -1
  103. fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml +0 -1
  104. fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml +0 -1
  105. fusion_bench_config/method/concrete_subspace/clip_post_defense_AWM.yaml +1 -12
  106. fusion_bench_config/method/concrete_subspace/clip_post_defense_SAU.yaml +1 -12
  107. fusion_bench_config/method/concrete_subspace/clip_safe_concrete_layer_wise_adamerging.yaml +1 -10
  108. fusion_bench_config/method/concrete_subspace/clip_safe_concrete_task_arithmetic.yaml +1 -14
  109. fusion_bench_config/method/dare/simple_average.yaml +0 -1
  110. fusion_bench_config/method/dare/task_arithmetic.yaml +0 -1
  111. fusion_bench_config/method/dare/ties_merging.yaml +0 -2
  112. fusion_bench_config/method/dawe/dawe_for_clip.yaml +0 -3
  113. fusion_bench_config/method/doge_ta/doge_ta.yaml +1 -1
  114. fusion_bench_config/method/ensemble/max_model_predictor.yaml +1 -1
  115. fusion_bench_config/method/ensemble/simple_ensemble.yaml +0 -1
  116. fusion_bench_config/method/ensemble/weighted_ensemble.yaml +0 -1
  117. fusion_bench_config/method/gossip/layer_wise_clip.yaml +30 -0
  118. fusion_bench_config/method/gossip/layer_wise_flan_t5.yaml +25 -0
  119. fusion_bench_config/method/isotropic_merging/iso_c.yaml +0 -1
  120. fusion_bench_config/method/isotropic_merging/iso_cts.yaml +0 -1
  121. fusion_bench_config/method/linear/linear_interpolation.yaml +0 -1
  122. fusion_bench_config/method/linear/llama_expo.yaml +0 -3
  123. fusion_bench_config/method/linear/llama_expo_with_dare.yaml +0 -5
  124. fusion_bench_config/method/linear/weighted_average.yaml +0 -1
  125. fusion_bench_config/method/linear/weighted_average_for_llama.yaml +0 -1
  126. fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +0 -4
  127. fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +0 -4
  128. fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +0 -6
  129. fusion_bench_config/method/mixtral_moe_upscaling.yaml +1 -2
  130. fusion_bench_config/method/model_recombination.yaml +0 -1
  131. fusion_bench_config/method/opcm/opcm.yaml +0 -1
  132. fusion_bench_config/method/opcm/task_arithmetic.yaml +0 -2
  133. fusion_bench_config/method/opcm/ties_merging.yaml +0 -2
  134. fusion_bench_config/method/opcm/weight_average.yaml +0 -1
  135. fusion_bench_config/method/pwe_moe/epo_for_openclip.yaml +30 -0
  136. fusion_bench_config/method/pwe_moe/ls_for_openclip.yaml +30 -0
  137. fusion_bench_config/method/{pwe_moe_ls_for_clip.yaml → pwe_moe/pwe_moe_ls_for_clip.yaml} +7 -6
  138. fusion_bench_config/method/rankone_moe/rankone_moe.yaml +1 -3
  139. fusion_bench_config/method/regmean/gpt2_regmean.yaml +0 -1
  140. fusion_bench_config/method/slerp/slerp.yaml +0 -2
  141. fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +5 -2
  142. fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +13 -0
  143. fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +1 -1
  144. fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +1 -1
  145. fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +1 -1
  146. fusion_bench_config/method/surgery/adamerging_surgery.yaml +1 -2
  147. fusion_bench_config/method/task_arithmetic.yaml +1 -1
  148. fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +0 -1
  149. fusion_bench_config/method/ties_merging.yaml +1 -1
  150. fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml +0 -1
  151. fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml +0 -8
  152. fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml +1 -1
  153. fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml +1 -1
  154. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml +1 -1
  155. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml +1 -1
  156. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_pcam.yaml +1 -1
  157. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml +1 -1
  158. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +1 -1
  159. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +1 -1
  160. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stl10.yaml +1 -1
  161. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +1 -1
  162. fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +1 -1
  163. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_lora.yaml +0 -3
  164. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +0 -3
  165. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual_lora.yaml +0 -3
  166. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +0 -3
  167. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +0 -3
  168. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +0 -3
  169. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +0 -4
  170. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +0 -3
  171. fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +0 -4
  172. fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +0 -4
  173. fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml +0 -1
  174. fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +0 -4
  175. fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +0 -4
  176. fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +17 -0
  177. fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +0 -1
  178. fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml +0 -3
  179. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md +90 -0
  180. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-16_TA8.yaml +27 -0
  181. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA8.yaml +45 -0
  182. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_cars_dtd.yaml +23 -0
  183. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_cars.yaml +23 -0
  184. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_dtd.yaml +23 -0
  185. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_individual.yaml +7 -0
  186. fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-L-14_TA8.yaml +26 -0
  187. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml +0 -1
  188. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml +0 -2
  189. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml +0 -2
  190. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_tta.yaml +1 -3
  191. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml +0 -1
  192. fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml +0 -3
  193. fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +0 -4
  194. fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +0 -3
  195. fusion_bench_config/modelpool/gpt-2_glue.yaml +0 -3
  196. fusion_bench_config/nyuv2_config.yaml +0 -2
  197. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml +0 -3
  198. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml +0 -2
  199. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +0 -2
  200. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml +0 -2
  201. fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml +12 -0
  202. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml +24 -0
  203. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml +24 -0
  204. fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml +24 -0
  205. fusion_bench_config/taskpool/gpt-2_glue.yaml +0 -1
  206. fusion_bench_config/taskpool/reward_model_evaluation.yaml +0 -4
  207. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/entry_points.txt +0 -0
  208. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/licenses/LICENSE +0 -0
  209. {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.14.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ models:
13
13
  expert_3:
14
14
  _target_: transformers.AutoModelForCausalLM.from_pretrained
15
15
  pretrained_model_name_or_path: uukuguy/speechless-code-mistral-7b-v1.0
16
-
17
16
  model_kwargs:
18
17
  torch_dtype: float16
19
18
  tokenizer:
@@ -6,12 +6,9 @@ models:
6
6
  _pretrained_:
7
7
  _target_: transformers.LlamaForCausalLM.from_pretrained
8
8
  pretrained_model_name_or_path: ${...base_model}
9
-
10
9
  model_kwargs:
11
10
  torch_dtype: float16
12
-
13
11
  tokenizer:
14
12
  _target_: transformers.AutoTokenizer.from_pretrained
15
13
  pretrained_model_name_or_path: ${..base_model}
16
-
17
14
  base_model: decapoda-research/llama-7b-hf
@@ -0,0 +1,90 @@
1
+ # OpenCLIPVisionModelPool
2
+
3
+ This is a model pool for OpenCLIP Vision models.
4
+
5
+ ## Usage
6
+
7
+ By default, the model checkpoints are placed in the `.cache/task_vectors_checkpoints` directory.
8
+
9
+ ```
10
+ .cache/
11
+ ├── task_vectors_checkpoints/
12
+ │ ├── ViT-B-16
13
+ │ │ ├── Cars/finetuned.pt
14
+ │ │ ├── DTD/finetuned.pt
15
+ │ │ ├── ...
16
+ │ ├── ViT-B-32
17
+ │ │ ├── Cars/finetuned.pt
18
+ │ │ ├── DTD/finetuned.pt
19
+ │ │ ├── ...
20
+ │ ├── ...
21
+ │ ├── head_Cars.pt
22
+ │ ├── head_DTD.pt
23
+ │ ├── ...
24
+ | └── zeroshot.pt
25
+ └── ...
26
+ ```
27
+
28
+ ## Model Configuration
29
+
30
+ The model pool supports several formats for model configuration:
31
+
32
+ 1. **Direct Path (String)**:
33
+ - A string path to a model checkpoint in pickle format
34
+ - Example: `"path/to/model.pt"`
35
+
36
+ 2. **Pickle Path Configuration**:
37
+ ```yaml
38
+ model_name: "ViT-B-16" # Name of the model
39
+ pickle_path: "path/to/model.pt" # Path to pickle file
40
+ ```
41
+
42
+ 3. **State Dict Configuration**:
43
+ ```yaml
44
+ model_name: "ViT-B-16" # Name of the model
45
+ state_dict_path: "path/to/state_dict.pt" # Path to state dict file
46
+ ```
47
+
48
+ 4. **Hydra Configuration**:
49
+ - Any configuration that can be instantiated using Hydra's `instantiate`
50
+
51
+ ## Classification Head Configuration
52
+
53
+ The classification heads can be configured in two ways:
54
+
55
+ 1. **Direct Path (String)**:
56
+ - A string path to a classification head checkpoint in pickle format
57
+ - Example: `"path/to/head.pt"`
58
+
59
+ 2. **Hydra Configuration**:
60
+ - Any configuration that can be instantiated using Hydra's `instantiate`
61
+
62
+ ## Dataset Configuration
63
+
64
+ The model pool supports loading datasets in two ways:
65
+
66
+ 1. **Direct Dataset Name (String)**:
67
+ - A string identifier that can be loaded using `datasets.load_dataset`
68
+ - Example: `"cifar10"`
69
+
70
+ 2. **Custom Configuration**:
71
+ - Any custom dataset configuration that can be handled by the parent class
72
+
73
+ ## Example Configuration
74
+
75
+ Here's an example of a complete configuration:
76
+
77
+ ```yaml
78
+ models:
79
+ vit_b16:
80
+ model_name: "ViT-B-16"
81
+ pickle_path: ".cache/task_vectors_checkpoints/ViT-B-16/Cars/finetuned.pt"
82
+ vit_b32:
83
+ model_name: "ViT-B-32"
84
+ state_dict_path: ".cache/task_vectors_checkpoints/ViT-B-32/DTD/finetuned.pt"
85
+
86
+ classification_heads:
87
+ cars_head: ".cache/task_vectors_checkpoints/head_Cars.pt"
88
+ dtd_head: ".cache/task_vectors_checkpoints/head_DTD.pt"
89
+ ```
90
+
@@ -0,0 +1,27 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets: the_eight_tasks
3
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
4
+ - _self_
5
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
6
+ _recursive_: false
7
+ # path of the checkpoint directory
8
+ model_dir: ./.cache/task_vectors_checkpoints/
9
+ models:
10
+ _pretrained_: ${..model_dir}/ViT-B-16/zeroshot.pt
11
+ sun397: ${..model_dir}/ViT-B-16/SUN397/finetuned.pt
12
+ stanford-cars: ${..model_dir}/ViT-B-16/Cars/finetuned.pt
13
+ resisc45: ${..model_dir}/ViT-B-16/RESISC45/finetuned.pt
14
+ eurosat: ${..model_dir}/ViT-B-16/EuroSAT/finetuned.pt
15
+ svhn: ${..model_dir}/ViT-B-16/SVHN/finetuned.pt
16
+ gtsrb: ${..model_dir}/ViT-B-16/GTSRB/finetuned.pt
17
+ mnist: ${..model_dir}/ViT-B-16/MNIST/finetuned.pt
18
+ dtd: ${..model_dir}/ViT-B-16/DTD/finetuned.pt
19
+ classification_heads:
20
+ sun397: ${..model_dir}/ViT-B-16/head_SUN397.pt
21
+ stanford-cars: ${..model_dir}/ViT-B-16/head_Cars.pt
22
+ resisc45: ${..model_dir}/ViT-B-16/head_RESISC45.pt
23
+ eurosat: ${..model_dir}/ViT-B-16/head_EuroSAT.pt
24
+ svhn: ${..model_dir}/ViT-B-16/head_SVHN.pt
25
+ gtsrb: ${..model_dir}/ViT-B-16/head_GTSRB.pt
26
+ mnist: ${..model_dir}/ViT-B-16/head_MNIST.pt
27
+ dtd: ${..model_dir}/ViT-B-16/head_DTD.pt
@@ -0,0 +1,45 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets: the_eight_tasks
3
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
4
+ - _self_
5
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
6
+ _recursive_: false
7
+ # path of the checkpoint directory
8
+ model_dir: ./.cache/task_vectors_checkpoints/
9
+ models:
10
+ _pretrained_:
11
+ model_name: ViT-B-32
12
+ pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
13
+ sun397:
14
+ model_name: ViT-B-32
15
+ pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
16
+ stanford-cars:
17
+ model_name: ViT-B-32
18
+ pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
19
+ resisc45:
20
+ model_name: ViT-B-32
21
+ pickle_path: ${...model_dir}/ViT-B-32/RESISC45/finetuned.pt
22
+ eurosat:
23
+ model_name: ViT-B-32
24
+ pickle_path: ${...model_dir}/ViT-B-32/EuroSAT/finetuned.pt
25
+ svhn:
26
+ model_name: ViT-B-32
27
+ pickle_path: ${...model_dir}/ViT-B-32/SVHN/finetuned.pt
28
+ gtsrb:
29
+ model_name: ViT-B-32
30
+ pickle_path: ${...model_dir}/ViT-B-32/GTSRB/finetuned.pt
31
+ mnist:
32
+ model_name: ViT-B-32
33
+ pickle_path: ${...model_dir}/ViT-B-32/MNIST/finetuned.pt
34
+ dtd:
35
+ model_name: ViT-B-32
36
+ pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
37
+ classification_heads:
38
+ sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
39
+ stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
40
+ resisc45: ${..model_dir}/ViT-B-32/head_RESISC45.pt
41
+ eurosat: ${..model_dir}/ViT-B-32/head_EuroSAT.pt
42
+ svhn: ${..model_dir}/ViT-B-32/head_SVHN.pt
43
+ gtsrb: ${..model_dir}/ViT-B-32/head_GTSRB.pt
44
+ mnist: ${..model_dir}/ViT-B-32/head_MNIST.pt
45
+ dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
@@ -0,0 +1,23 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets:
3
+ - stanford-cars
4
+ - dtd
5
+ - /dataset/image_classification/test@test_datasets:
6
+ - stanford-cars
7
+ - dtd
8
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
9
+ _recursive_: false
10
+ model_dir: ./.cache/task_vectors_checkpoints/
11
+ models:
12
+ _pretrained_:
13
+ model_name: ViT-B-32
14
+ pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
15
+ stanford-cars:
16
+ model_name: ViT-B-32
17
+ pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
18
+ dtd:
19
+ model_name: ViT-B-32
20
+ pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
21
+ classification_heads:
22
+ stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
23
+ dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
@@ -0,0 +1,23 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets:
3
+ - sun397
4
+ - stanford-cars
5
+ - /dataset/image_classification/test@test_datasets:
6
+ - sun397
7
+ - stanford-cars
8
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
9
+ _recursive_: false
10
+ model_dir: ./.cache/task_vectors_checkpoints/
11
+ models:
12
+ _pretrained_:
13
+ model_name: ViT-B-32
14
+ pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
15
+ sun397:
16
+ model_name: ViT-B-32
17
+ pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
18
+ stanford-cars:
19
+ model_name: ViT-B-32
20
+ pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
21
+ classification_heads:
22
+ sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
23
+ stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
@@ -0,0 +1,23 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets:
3
+ - sun397
4
+ - dtd
5
+ - /dataset/image_classification/test@test_datasets:
6
+ - sun397
7
+ - dtd
8
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
9
+ _recursive_: false
10
+ model_dir: ./.cache/task_vectors_checkpoints/
11
+ models:
12
+ _pretrained_:
13
+ model_name: ViT-B-32
14
+ pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
15
+ sun397:
16
+ model_name: ViT-B-32
17
+ pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
18
+ dtd:
19
+ model_name: ViT-B-32
20
+ pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
21
+ classification_heads:
22
+ sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
23
+ dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
@@ -0,0 +1,7 @@
1
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
2
+ _recursive_: false
3
+ model_dir: ./.cache/task_vectors_checkpoints/
4
+ models:
5
+ _pretrained_:
6
+ model_name: ViT-B-32
7
+ pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
@@ -0,0 +1,26 @@
1
+ defaults:
2
+ - /dataset/image_classification/train@train_datasets: the_eight_tasks
3
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
4
+ - _self_
5
+ _target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
6
+ _recursive_: false
7
+ model_dir: ./.cache/task_vectors_checkpoints/
8
+ models:
9
+ _pretrained_: ${..model_dir}/ViT-L-14/zeroshot.pt
10
+ sun397: ${..model_dir}/ViT-L-14/SUN397/finetuned.pt
11
+ stanford-cars: ${..model_dir}/ViT-L-14/Cars/finetuned.pt
12
+ resisc45: ${..model_dir}/ViT-L-14/RESISC45/finetuned.pt
13
+ eurosat: ${..model_dir}/ViT-L-14/EuroSAT/finetuned.pt
14
+ svhn: ${..model_dir}/ViT-L-14/SVHN/finetuned.pt
15
+ gtsrb: ${..model_dir}/ViT-L-14/GTSRB/finetuned.pt
16
+ mnist: ${..model_dir}/ViT-L-14/MNIST/finetuned.pt
17
+ dtd: ${..model_dir}/ViT-L-14/DTD/finetuned.pt
18
+ classification_heads:
19
+ sun397: ${..model_dir}/ViT-L-14/head_SUN397.pt
20
+ stanford-cars: ${..model_dir}/ViT-L-14/head_Cars.pt
21
+ resisc45: ${..model_dir}/ViT-L-14/head_RESISC45.pt
22
+ eurosat: ${..model_dir}/ViT-L-14/head_EuroSAT.pt
23
+ svhn: ${..model_dir}/ViT-L-14/head_SVHN.pt
24
+ gtsrb: ${..model_dir}/ViT-L-14/head_GTSRB.pt
25
+ mnist: ${..model_dir}/ViT-L-14/head_MNIST.pt
26
+ dtd: ${..model_dir}/ViT-L-14/head_DTD.pt
@@ -10,4 +10,3 @@ defaults:
10
10
  - flan-t5-base_glue-rte
11
11
  - flan-t5-base_glue-sst2
12
12
  - flan-t5-base_glue-stsb
13
-
@@ -1,6 +1,5 @@
1
1
  defaults:
2
2
  - Seq2SeqLMPool@: _template
3
-
4
3
  models:
5
4
  _pretrained_:
6
5
  _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
@@ -37,5 +36,4 @@ models:
37
36
  _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
38
37
  base_model_path: ${...base_model}
39
38
  peft_model_path: tanganke/flan-t5-base_glue-stsb_lora-16
40
-
41
39
  base_model: google/flan-t5-base
@@ -11,7 +11,6 @@ defaults:
11
11
  - flan-t5-base_glue-stsb_lora-16
12
12
  _target_: fusion_bench.modelpool.Seq2SeqLMPool
13
13
  _recursive_: false
14
-
15
14
  _dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
16
15
  test_datasets:
17
16
  glue-cola:
@@ -62,7 +61,6 @@ test_datasets:
62
61
  name: stsb
63
62
  tokenizer: ${...tokenizer}
64
63
  split: validation
65
-
66
64
  tokenizer:
67
65
  _target_: transformers.AutoTokenizer.from_pretrained
68
66
  pretrained_model_name_or_path: google/flan-t5-base
@@ -11,7 +11,6 @@ defaults:
11
11
  - flan-t5-base_glue-stsb
12
12
  _target_: fusion_bench.modelpool.Seq2SeqLMPool
13
13
  _recursive_: false
14
-
15
14
  _dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
16
15
  test_datasets:
17
16
  glue-cola:
@@ -62,7 +61,6 @@ test_datasets:
62
61
  name: stsb
63
62
  tokenizer: ${...tokenizer}
64
63
  split: validation
65
-
66
64
  tokenizer:
67
65
  _target_: transformers.AutoTokenizer.from_pretrained
68
- pretrained_model_name_or_path: google/flan-t5-base
66
+ pretrained_model_name_or_path: google/flan-t5-base
@@ -1,6 +1,5 @@
1
1
  defaults:
2
2
  - Seq2SeqLMPool@: _template
3
-
4
3
  models:
5
4
  _pretrained_:
6
5
  _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
@@ -1,6 +1,5 @@
1
1
  defaults:
2
2
  - Seq2SeqLMPool@: _template
3
-
4
3
  models:
5
4
  _pretrained_:
6
5
  _target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
@@ -37,9 +36,7 @@ models:
37
36
  _target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
38
37
  base_model_path: ${...base_model}
39
38
  peft_model_path: tanganke/flan-t5-large_glue-stsb_lora-16
40
-
41
39
  tokenizer:
42
40
  _target_: transformers.AutoTokenizer.from_pretrained
43
41
  pretrained_model_name_or_path: ${..base_model}
44
-
45
42
  base_model: google/flan-t5-large
@@ -1,19 +1,15 @@
1
1
  _target_: fusion_bench.modelpool.SeqenceClassificationModelPool
2
-
3
2
  pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
4
-
5
3
  models:
6
4
  _pretrained_:
7
5
  _target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained
8
6
  pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
9
7
  torch_dtype: bfloat16
10
8
  use_flash_attention_2: true
11
-
12
9
  tokenizer:
13
10
  _target_: transformers.AutoTokenizer.from_pretrained
14
11
  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
15
12
  pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content
16
-
17
13
  train_datasets:
18
14
  preference_700k:
19
15
  _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
@@ -1,13 +1,10 @@
1
1
  _target_: fusion_bench.modelpool.SeqenceClassificationModelPool
2
-
3
2
  pretrained_model_name_or_path: fusion-bench/Llama-3.2-1B-Instruct_Bradly-Terry-RM_Preference-700k
4
-
5
3
  models:
6
4
  _pretrained_:
7
5
  _target_: transformers.AutoModelForSequenceClassification.from_pretrained
8
6
  pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
9
7
  torch_dtype: bfloat16
10
-
11
8
  tokenizer:
12
9
  _target_: transformers.AutoTokenizer.from_pretrained
13
10
  pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.modelpool.HuggingFaceGPT2ClassificationPool
2
-
3
2
  _model_loader: transformers.GPT2Model.from_pretrained
4
3
  models:
5
4
  _pretrained_:
@@ -26,7 +25,6 @@ models:
26
25
  sst2:
27
26
  _target_: ${..._model_loader}
28
27
  pretrained_model_name_or_path: tanganke/gpt2_sst2
29
-
30
28
  # train datasets for RegMean, Fisher Merging ...
31
29
  _dataset_loader: fusion_bench.modelpool.huggingface_gpt2_classification.load_gpt2_dataset
32
30
  train_datasets:
@@ -58,7 +56,6 @@ train_datasets:
58
56
  _target_: ${..._dataset_loader}
59
57
  name: sst2
60
58
  split: train
61
-
62
59
  tokenizer:
63
60
  _target_: fusion_bench.modelpool.huggingface_gpt2_classification.load_gpt2_tokenizer
64
61
  pretrained_model_name_or_path: gpt2
@@ -5,10 +5,8 @@ defaults:
5
5
  - method: simple_average
6
6
  - taskpool: nyuv2_taskpool
7
7
  - _self_
8
-
9
8
  _target_: fusion_bench.programs.FabricModelFusionProgram
10
9
  _recursive_: false
11
-
12
10
  fast_dev_run: false # Run a single batch of data to test the model or method
13
11
  use_lightning: true # Use the fabric to run the experiment
14
12
  print_config: true # Print the configuration to the console
@@ -2,10 +2,8 @@
2
2
  #
3
3
  # defaults:
4
4
  # - CLIPVisionModelTaskPool@: _template
5
-
6
5
  _target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
7
6
  _recursive_: false
8
-
9
7
  test_datasets: ??? # The datasets to evaluate the model on
10
8
  base_model: openai/clip-vit-base-patch32
11
9
  clip_model:
@@ -21,7 +19,6 @@ dataloader_kwargs:
21
19
  pin_memory: True # Whether to pin memory in data loader
22
20
  drop_last: False # Whether to drop the last incomplete batch
23
21
  shuffle: False # Whether to shuffle the data
24
-
25
22
  # === layer-wise feature saving ===
26
23
  # The path to save the features to, if none then the features are not saved
27
24
  # This is the path to a directory, the features of task `task_name` will be saved in `feature_save_path/task_name.csv`
@@ -9,10 +9,8 @@ defaults:
9
9
  - gtsrb
10
10
  - mnist
11
11
  - dtd
12
-
13
12
  _target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
14
13
  _recursive_: false
15
-
16
14
  test_datasets: ???
17
15
  base_model: openai/clip-vit-base-patch16
18
16
  clip_model:
@@ -10,9 +10,7 @@ defaults:
10
10
  - mnist
11
11
  - dtd
12
12
  - _self_
13
-
14
13
  _target_: fusion_bench.taskpool.RankoneWEMoECLIPVisionModelTaskPool
15
-
16
14
  # === layer-wise routing weights saving ===
17
15
  layer_wise_routing_weights_save_path: null
18
16
  layer_wise_routing_weights_max_num: 1000
@@ -10,9 +10,7 @@ defaults:
10
10
  - mnist
11
11
  - dtd
12
12
  - _self_
13
-
14
13
  _target_: fusion_bench.taskpool.SparseWEMoECLIPVisionModelTaskPool
15
-
16
14
  # === layer-wise routing weights saving ===
17
15
  layer_wise_routing_weights_save_path: null
18
16
  layer_wise_routing_weights_max_num: 1000
@@ -0,0 +1,12 @@
1
+ _target_: fusion_bench.taskpool.LMEvalHarnessTaskPool
2
+
3
+ tasks:
4
+ - truthfulqa
5
+ batch_size: 1
6
+ verbosity: null
7
+ include_path: null
8
+ apply_chat_template: false
9
+ # if `output_path` is not given, the results will be saved to `log_dir/lm_eval_results`, where `log_dir` is the directory controlled by lightning Fabric.
10
+ output_path: null
11
+ # if `log_samples` is true, the samples will be saved to `output_path`.
12
+ log_samples: false
@@ -0,0 +1,24 @@
1
+ defaults:
2
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
3
+ - _self_
4
+ _target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
5
+ _recursive_: false
6
+ # name of the base model
7
+ model_name: ViT-B-16
8
+ # path of the checkpoint directory
9
+ model_dir: ./.cache/task_vectors_checkpoints/
10
+ classification_heads:
11
+ sun397: ${..model_dir}/ViT-B-16/head_SUN397.pt
12
+ stanford-cars: ${..model_dir}/ViT-B-16/head_Cars.pt
13
+ resisc45: ${..model_dir}/ViT-B-16/head_RESISC45.pt
14
+ eurosat: ${..model_dir}/ViT-B-16/head_EuroSAT.pt
15
+ svhn: ${..model_dir}/ViT-B-16/head_SVHN.pt
16
+ gtsrb: ${..model_dir}/ViT-B-16/head_GTSRB.pt
17
+ mnist: ${..model_dir}/ViT-B-16/head_MNIST.pt
18
+ dtd: ${..model_dir}/ViT-B-16/head_DTD.pt
19
+ dataloader_kwargs:
20
+ batch_size: 128 # The batch size for the data loader
21
+ num_workers: 8 # The number of worker processes for data loading
22
+ pin_memory: True # Whether to pin memory in data loader
23
+ drop_last: False # Whether to drop the last incomplete batch
24
+ shuffle: False # Whether to shuffle the data
@@ -0,0 +1,24 @@
1
+ defaults:
2
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
3
+ - _self_
4
+ _target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
5
+ _recursive_: false
6
+ # name of the base model
7
+ model_name: ViT-B-32
8
+ # path of the checkpoint directory
9
+ model_dir: ./.cache/task_vectors_checkpoints/
10
+ classification_heads:
11
+ sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
12
+ stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
13
+ resisc45: ${..model_dir}/ViT-B-32/head_RESISC45.pt
14
+ eurosat: ${..model_dir}/ViT-B-32/head_EuroSAT.pt
15
+ svhn: ${..model_dir}/ViT-B-32/head_SVHN.pt
16
+ gtsrb: ${..model_dir}/ViT-B-32/head_GTSRB.pt
17
+ mnist: ${..model_dir}/ViT-B-32/head_MNIST.pt
18
+ dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
19
+ dataloader_kwargs:
20
+ batch_size: 128 # The batch size for the data loader
21
+ num_workers: 8 # The number of worker processes for data loading
22
+ pin_memory: True # Whether to pin memory in data loader
23
+ drop_last: False # Whether to drop the last incomplete batch
24
+ shuffle: False # Whether to shuffle the data
@@ -0,0 +1,24 @@
1
+ defaults:
2
+ - /dataset/image_classification/test@test_datasets: the_eight_tasks
3
+ - _self_
4
+ _target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
5
+ _recursive_: false
6
+ # name of the base model
7
+ model_name: ViT-L-14
8
+ # path of the checkpoint directory
9
+ model_dir: ./.cache/task_vectors_checkpoints/
10
+ classification_heads:
11
+ sun397: ${..model_dir}/ViT-L-14/head_SUN397.pt
12
+ stanford-cars: ${..model_dir}/ViT-L-14/head_Cars.pt
13
+ resisc45: ${..model_dir}/ViT-L-14/head_RESISC45.pt
14
+ eurosat: ${..model_dir}/ViT-L-14/head_EuroSAT.pt
15
+ svhn: ${..model_dir}/ViT-L-14/head_SVHN.pt
16
+ gtsrb: ${..model_dir}/ViT-L-14/head_GTSRB.pt
17
+ mnist: ${..model_dir}/ViT-L-14/head_MNIST.pt
18
+ dtd: ${..model_dir}/ViT-L-14/head_DTD.pt
19
+ dataloader_kwargs:
20
+ batch_size: 128 # The batch size for the data loader
21
+ num_workers: 8 # The number of worker processes for data loading
22
+ pin_memory: True # Whether to pin memory in data loader
23
+ drop_last: False # Whether to drop the last incomplete batch
24
+ shuffle: False # Whether to shuffle the data
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.taskpool.GPT2TextClassificationTaskPool
2
-
3
2
  _dataset_loader: fusion_bench.taskpool.gpt2_text_classification.load_gpt2_dataset
4
3
  test_datasets:
5
4
  cola:
@@ -1,5 +1,4 @@
1
1
  _target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool
2
-
3
2
  test_datasets:
4
3
  preference_700k:
5
4
  _target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
@@ -7,12 +6,9 @@ test_datasets:
7
6
  path: hendrydong/preference_700K
8
7
  split: train
9
8
  cache_path: null
10
-
11
9
  dataloader_kwargs:
12
10
  shuffle: False
13
11
  batch_size: 16
14
-
15
12
  tokenizer: ${..modelpool.tokenizer}
16
-
17
13
  max_num_samples: 1000
18
14
  seed: 42