fusion-bench 0.2.12__py3-none-any.whl → 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/compat/method/__init__.py +2 -0
- fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py +4 -1
- fusion_bench/constants/clip_vision.py +22 -0
- fusion_bench/dataset/clip_dataset.py +10 -2
- fusion_bench/dataset/fer2013.py +1 -0
- fusion_bench/dataset/gsm8k.py +2 -2
- fusion_bench/method/__init__.py +10 -0
- fusion_bench/method/adamerging/clip_task_wise_adamerging.py +1 -29
- fusion_bench/method/fisher_merging/fisher_merging.py +29 -17
- fusion_bench/method/gossip/__init__.py +3 -0
- fusion_bench/method/gossip/clip_layer_wise_gossip.py +43 -0
- fusion_bench/method/gossip/clip_task_wise_gossip.py +190 -0
- fusion_bench/method/gossip/entropy_loss.py +25 -0
- fusion_bench/method/gossip/flan_t5_layer_wise_gossip.py +388 -0
- fusion_bench/method/gossip/layer_wise_gossip.py +434 -0
- fusion_bench/method/gossip/min_norm_solvers.py +227 -0
- fusion_bench/method/gossip/task_wise_gossip.py +265 -0
- fusion_bench/method/gossip/utils.py +74 -0
- fusion_bench/method/isotropic_merging/__init__.py +1 -1
- fusion_bench/method/opcm/opcm.py +16 -7
- fusion_bench/method/pwe_moe/module.py +1 -1
- fusion_bench/method/pwe_moe/openclip_pwe_moe.py +476 -0
- fusion_bench/method/regmean/regmean.py +25 -17
- fusion_bench/method/smile_upscaling/__init__.py +1 -1
- fusion_bench/method/smile_upscaling/smile_upscaling.py +13 -10
- fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py +7 -0
- fusion_bench/method/task_arithmetic/task_arithmetic.py +8 -6
- fusion_bench/method/ties_merging/ties_merging.py +36 -31
- fusion_bench/method/we_moe/we_moe.py +14 -15
- fusion_bench/mixins/__init__.py +6 -3
- fusion_bench/mixins/hydra_config.py +49 -0
- fusion_bench/mixins/openclip_classification.py +11 -0
- fusion_bench/mixins/simple_profiler.py +4 -2
- fusion_bench/modelpool/__init__.py +3 -1
- fusion_bench/modelpool/base_pool.py +2 -2
- fusion_bench/modelpool/openclip_vision/__init__.py +1 -0
- fusion_bench/modelpool/openclip_vision/modelpool.py +255 -0
- fusion_bench/models/open_clip/__init__.py +6 -0
- fusion_bench/models/open_clip/modeling.py +176 -0
- fusion_bench/models/open_clip/utils.py +311 -0
- fusion_bench/models/open_clip/variables_and_paths.py +56 -0
- fusion_bench/models/parameter_dict.py +54 -13
- fusion_bench/scripts/nyuv2_mtl_train.py +1 -1
- fusion_bench/taskpool/__init__.py +5 -3
- fusion_bench/taskpool/clip_vision/__init__.py +1 -0
- fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py +2 -30
- fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py +102 -0
- fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py +2 -30
- fusion_bench/taskpool/clip_vision/taskpool.py +1 -2
- fusion_bench/taskpool/clip_vision/utils/__init__.py +0 -0
- fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py +65 -0
- fusion_bench/taskpool/gpt2_text_classification.py +30 -1
- fusion_bench/taskpool/openclip_vision/__init__.py +1 -0
- fusion_bench/taskpool/openclip_vision/openclip_taskpool.py +196 -0
- fusion_bench/utils/data.py +12 -0
- fusion_bench/utils/devices.py +14 -0
- fusion_bench/utils/instantiate.py +12 -0
- fusion_bench/utils/misc.py +9 -2
- fusion_bench/utils/packages.py +14 -0
- fusion_bench/utils/parameters.py +1 -1
- fusion_bench/utils/tensorboard.py +1 -1
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/METADATA +1 -1
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/RECORD +190 -151
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/WHEEL +1 -1
- fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +1 -2
- fusion_bench_config/dataset/image_classification/test/TALL20.yaml +0 -1
- fusion_bench_config/dataset/image_classification/test/emnist_letters.yaml +0 -1
- fusion_bench_config/dataset/image_classification/test/fashion_mnist.yaml +1 -1
- fusion_bench_config/dataset/image_classification/train/TALL20.yaml +0 -1
- fusion_bench_config/dataset/image_classification/train/fashion_mnist.yaml +1 -1
- fusion_bench_config/fabric/auto.yaml +0 -1
- fusion_bench_config/fabric/llama_ddp.yaml +0 -1
- fusion_bench_config/fabric/llama_fsdp.yaml +0 -1
- fusion_bench_config/fabric/llama_peft_fsdp.yaml +0 -1
- fusion_bench_config/fabric/strategy/deepspeed.yaml +0 -1
- fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml +0 -1
- fusion_bench_config/fabric_model_fusion.yaml +0 -1
- fusion_bench_config/llama_full_finetune.yaml +0 -2
- fusion_bench_config/llama_model_fusion.yaml +0 -2
- fusion_bench_config/method/ada_svd/clip_vision.yaml +0 -1
- fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml +0 -5
- fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml +0 -5
- fusion_bench_config/method/adamerging/llama_sft.yaml +0 -2
- fusion_bench_config/method/adamerging.yaml +2 -2
- fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml +0 -1
- fusion_bench_config/method/analysis/task_vector_violin_plot.yaml +0 -1
- fusion_bench_config/method/classification/clip_continual_finetune.yaml +0 -1
- fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml +0 -1
- fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml +0 -1
- fusion_bench_config/method/concrete_subspace/clip_post_defense_AWM.yaml +1 -12
- fusion_bench_config/method/concrete_subspace/clip_post_defense_SAU.yaml +1 -12
- fusion_bench_config/method/concrete_subspace/clip_safe_concrete_layer_wise_adamerging.yaml +1 -10
- fusion_bench_config/method/concrete_subspace/clip_safe_concrete_task_arithmetic.yaml +1 -14
- fusion_bench_config/method/dare/simple_average.yaml +0 -1
- fusion_bench_config/method/dare/task_arithmetic.yaml +0 -1
- fusion_bench_config/method/dare/ties_merging.yaml +0 -2
- fusion_bench_config/method/dawe/dawe_for_clip.yaml +0 -3
- fusion_bench_config/method/doge_ta/doge_ta.yaml +1 -1
- fusion_bench_config/method/ensemble/max_model_predictor.yaml +1 -1
- fusion_bench_config/method/ensemble/simple_ensemble.yaml +0 -1
- fusion_bench_config/method/ensemble/weighted_ensemble.yaml +0 -1
- fusion_bench_config/method/gossip/layer_wise_clip.yaml +30 -0
- fusion_bench_config/method/gossip/layer_wise_flan_t5.yaml +25 -0
- fusion_bench_config/method/isotropic_merging/iso_c.yaml +0 -1
- fusion_bench_config/method/isotropic_merging/iso_cts.yaml +0 -1
- fusion_bench_config/method/linear/linear_interpolation.yaml +0 -1
- fusion_bench_config/method/linear/llama_expo.yaml +0 -3
- fusion_bench_config/method/linear/llama_expo_with_dare.yaml +0 -5
- fusion_bench_config/method/linear/weighted_average.yaml +0 -1
- fusion_bench_config/method/linear/weighted_average_for_llama.yaml +0 -1
- fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml +0 -4
- fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml +0 -4
- fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml +0 -6
- fusion_bench_config/method/mixtral_moe_upscaling.yaml +1 -2
- fusion_bench_config/method/model_recombination.yaml +0 -1
- fusion_bench_config/method/opcm/opcm.yaml +0 -1
- fusion_bench_config/method/opcm/task_arithmetic.yaml +0 -2
- fusion_bench_config/method/opcm/ties_merging.yaml +0 -2
- fusion_bench_config/method/opcm/weight_average.yaml +0 -1
- fusion_bench_config/method/pwe_moe/epo_for_openclip.yaml +30 -0
- fusion_bench_config/method/pwe_moe/ls_for_openclip.yaml +30 -0
- fusion_bench_config/method/{pwe_moe_ls_for_clip.yaml → pwe_moe/pwe_moe_ls_for_clip.yaml} +7 -6
- fusion_bench_config/method/rankone_moe/rankone_moe.yaml +1 -3
- fusion_bench_config/method/regmean/gpt2_regmean.yaml +0 -1
- fusion_bench_config/method/slerp/slerp.yaml +0 -2
- fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml +1 -1
- fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml +1 -1
- fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml +1 -1
- fusion_bench_config/method/surgery/adamerging_surgery.yaml +1 -2
- fusion_bench_config/method/task_arithmetic.yaml +1 -1
- fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +0 -1
- fusion_bench_config/method/ties_merging.yaml +1 -1
- fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml +0 -1
- fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml +0 -8
- fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_pcam.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stl10.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml +1 -1
- fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml +1 -1
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_lora.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual_lora.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml +0 -3
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +0 -4
- fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml +0 -3
- fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml +0 -4
- fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml +0 -4
- fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml +0 -1
- fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml +0 -4
- fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml +0 -4
- fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +0 -1
- fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml +0 -3
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md +90 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-16_TA8.yaml +27 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA8.yaml +45 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_cars_dtd.yaml +23 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_cars.yaml +23 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA_sun397_dtd.yaml +23 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_individual.yaml +7 -0
- fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-L-14_TA8.yaml +26 -0
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml +0 -1
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml +0 -2
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml +0 -2
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_tta.yaml +1 -3
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml +0 -1
- fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml +0 -3
- fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml +0 -4
- fusion_bench_config/modelpool/SeqenceClassificationModelPool/single_reward_model.yaml +0 -3
- fusion_bench_config/modelpool/gpt-2_glue.yaml +0 -3
- fusion_bench_config/nyuv2_config.yaml +0 -2
- fusion_bench_config/taskpool/CLIPVisionModelTaskPool/_template.yaml +0 -3
- fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-classification_TA8_B16.yaml +0 -2
- fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml +0 -2
- fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml +0 -2
- fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml +24 -0
- fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml +24 -0
- fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml +24 -0
- fusion_bench_config/taskpool/gpt-2_glue.yaml +0 -1
- fusion_bench_config/taskpool/reward_model_evaluation.yaml +0 -4
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.12.dist-info → fusion_bench-0.2.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# OpenCLIPVisionModelPool
|
|
2
|
+
|
|
3
|
+
This is a model pool for OpenCLIP Vision models.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
By default, the model checkpoints are placed in the `.cache/task_vectors_checkpoints` directory.
|
|
8
|
+
|
|
9
|
+
```
|
|
10
|
+
.cache/
|
|
11
|
+
├── task_vectors_checkpoints/
|
|
12
|
+
│ ├── ViT-B-16
|
|
13
|
+
│ │ ├── Cars/finetuned.pt
|
|
14
|
+
│ │ ├── DTD/finetuned.pt
|
|
15
|
+
│ │ ├── ...
|
|
16
|
+
│ ├── ViT-B-32
|
|
17
|
+
│ │ ├── Cars/finetuned.pt
|
|
18
|
+
│ │ ├── DTD/finetuned.pt
|
|
19
|
+
│ │ ├── ...
|
|
20
|
+
│ ├── ...
|
|
21
|
+
│ ├── head_Cars.pt
|
|
22
|
+
│ ├── head_DTD.pt
|
|
23
|
+
│ ├── ...
|
|
24
|
+
| └── zeroshot.pt
|
|
25
|
+
└── ...
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Model Configuration
|
|
29
|
+
|
|
30
|
+
The model pool supports several formats for model configuration:
|
|
31
|
+
|
|
32
|
+
1. **Direct Path (String)**:
|
|
33
|
+
- A string path to a model checkpoint in pickle format
|
|
34
|
+
- Example: `"path/to/model.pt"`
|
|
35
|
+
|
|
36
|
+
2. **Pickle Path Configuration**:
|
|
37
|
+
```yaml
|
|
38
|
+
model_name: "ViT-B-16" # Name of the model
|
|
39
|
+
pickle_path: "path/to/model.pt" # Path to pickle file
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. **State Dict Configuration**:
|
|
43
|
+
```yaml
|
|
44
|
+
model_name: "ViT-B-16" # Name of the model
|
|
45
|
+
state_dict_path: "path/to/state_dict.pt" # Path to state dict file
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
4. **Hydra Configuration**:
|
|
49
|
+
- Any configuration that can be instantiated using Hydra's `instantiate`
|
|
50
|
+
|
|
51
|
+
## Classification Head Configuration
|
|
52
|
+
|
|
53
|
+
The classification heads can be configured in two ways:
|
|
54
|
+
|
|
55
|
+
1. **Direct Path (String)**:
|
|
56
|
+
- A string path to a classification head checkpoint in pickle format
|
|
57
|
+
- Example: `"path/to/head.pt"`
|
|
58
|
+
|
|
59
|
+
2. **Hydra Configuration**:
|
|
60
|
+
- Any configuration that can be instantiated using Hydra's `instantiate`
|
|
61
|
+
|
|
62
|
+
## Dataset Configuration
|
|
63
|
+
|
|
64
|
+
The model pool supports loading datasets in two ways:
|
|
65
|
+
|
|
66
|
+
1. **Direct Dataset Name (String)**:
|
|
67
|
+
- A string identifier that can be loaded using `datasets.load_dataset`
|
|
68
|
+
- Example: `"cifar10"`
|
|
69
|
+
|
|
70
|
+
2. **Custom Configuration**:
|
|
71
|
+
- Any custom dataset configuration that can be handled by the parent class
|
|
72
|
+
|
|
73
|
+
## Example Configuration
|
|
74
|
+
|
|
75
|
+
Here's an example of a complete configuration:
|
|
76
|
+
|
|
77
|
+
```yaml
|
|
78
|
+
models:
|
|
79
|
+
vit_b16:
|
|
80
|
+
model_name: "ViT-B-16"
|
|
81
|
+
pickle_path: ".cache/task_vectors_checkpoints/ViT-B-16/Cars/finetuned.pt"
|
|
82
|
+
vit_b32:
|
|
83
|
+
model_name: "ViT-B-32"
|
|
84
|
+
state_dict_path: ".cache/task_vectors_checkpoints/ViT-B-32/DTD/finetuned.pt"
|
|
85
|
+
|
|
86
|
+
classification_heads:
|
|
87
|
+
cars_head: ".cache/task_vectors_checkpoints/head_Cars.pt"
|
|
88
|
+
dtd_head: ".cache/task_vectors_checkpoints/head_DTD.pt"
|
|
89
|
+
```
|
|
90
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets: the_eight_tasks
|
|
3
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
4
|
+
- _self_
|
|
5
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
6
|
+
_recursive_: false
|
|
7
|
+
# path of the checkpoint directory
|
|
8
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
9
|
+
models:
|
|
10
|
+
_pretrained_: ${..model_dir}/ViT-B-16/zeroshot.pt
|
|
11
|
+
sun397: ${..model_dir}/ViT-B-16/SUN397/finetuned.pt
|
|
12
|
+
stanford-cars: ${..model_dir}/ViT-B-16/Cars/finetuned.pt
|
|
13
|
+
resisc45: ${..model_dir}/ViT-B-16/RESISC45/finetuned.pt
|
|
14
|
+
eurosat: ${..model_dir}/ViT-B-16/EuroSAT/finetuned.pt
|
|
15
|
+
svhn: ${..model_dir}/ViT-B-16/SVHN/finetuned.pt
|
|
16
|
+
gtsrb: ${..model_dir}/ViT-B-16/GTSRB/finetuned.pt
|
|
17
|
+
mnist: ${..model_dir}/ViT-B-16/MNIST/finetuned.pt
|
|
18
|
+
dtd: ${..model_dir}/ViT-B-16/DTD/finetuned.pt
|
|
19
|
+
classification_heads:
|
|
20
|
+
sun397: ${..model_dir}/ViT-B-16/head_SUN397.pt
|
|
21
|
+
stanford-cars: ${..model_dir}/ViT-B-16/head_Cars.pt
|
|
22
|
+
resisc45: ${..model_dir}/ViT-B-16/head_RESISC45.pt
|
|
23
|
+
eurosat: ${..model_dir}/ViT-B-16/head_EuroSAT.pt
|
|
24
|
+
svhn: ${..model_dir}/ViT-B-16/head_SVHN.pt
|
|
25
|
+
gtsrb: ${..model_dir}/ViT-B-16/head_GTSRB.pt
|
|
26
|
+
mnist: ${..model_dir}/ViT-B-16/head_MNIST.pt
|
|
27
|
+
dtd: ${..model_dir}/ViT-B-16/head_DTD.pt
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets: the_eight_tasks
|
|
3
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
4
|
+
- _self_
|
|
5
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
6
|
+
_recursive_: false
|
|
7
|
+
# path of the checkpoint directory
|
|
8
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
9
|
+
models:
|
|
10
|
+
_pretrained_:
|
|
11
|
+
model_name: ViT-B-32
|
|
12
|
+
pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
|
|
13
|
+
sun397:
|
|
14
|
+
model_name: ViT-B-32
|
|
15
|
+
pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
|
|
16
|
+
stanford-cars:
|
|
17
|
+
model_name: ViT-B-32
|
|
18
|
+
pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
|
|
19
|
+
resisc45:
|
|
20
|
+
model_name: ViT-B-32
|
|
21
|
+
pickle_path: ${...model_dir}/ViT-B-32/RESISC45/finetuned.pt
|
|
22
|
+
eurosat:
|
|
23
|
+
model_name: ViT-B-32
|
|
24
|
+
pickle_path: ${...model_dir}/ViT-B-32/EuroSAT/finetuned.pt
|
|
25
|
+
svhn:
|
|
26
|
+
model_name: ViT-B-32
|
|
27
|
+
pickle_path: ${...model_dir}/ViT-B-32/SVHN/finetuned.pt
|
|
28
|
+
gtsrb:
|
|
29
|
+
model_name: ViT-B-32
|
|
30
|
+
pickle_path: ${...model_dir}/ViT-B-32/GTSRB/finetuned.pt
|
|
31
|
+
mnist:
|
|
32
|
+
model_name: ViT-B-32
|
|
33
|
+
pickle_path: ${...model_dir}/ViT-B-32/MNIST/finetuned.pt
|
|
34
|
+
dtd:
|
|
35
|
+
model_name: ViT-B-32
|
|
36
|
+
pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
|
|
37
|
+
classification_heads:
|
|
38
|
+
sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
|
|
39
|
+
stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
|
|
40
|
+
resisc45: ${..model_dir}/ViT-B-32/head_RESISC45.pt
|
|
41
|
+
eurosat: ${..model_dir}/ViT-B-32/head_EuroSAT.pt
|
|
42
|
+
svhn: ${..model_dir}/ViT-B-32/head_SVHN.pt
|
|
43
|
+
gtsrb: ${..model_dir}/ViT-B-32/head_GTSRB.pt
|
|
44
|
+
mnist: ${..model_dir}/ViT-B-32/head_MNIST.pt
|
|
45
|
+
dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets:
|
|
3
|
+
- stanford-cars
|
|
4
|
+
- dtd
|
|
5
|
+
- /dataset/image_classification/test@test_datasets:
|
|
6
|
+
- stanford-cars
|
|
7
|
+
- dtd
|
|
8
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
9
|
+
_recursive_: false
|
|
10
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
11
|
+
models:
|
|
12
|
+
_pretrained_:
|
|
13
|
+
model_name: ViT-B-32
|
|
14
|
+
pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
|
|
15
|
+
stanford-cars:
|
|
16
|
+
model_name: ViT-B-32
|
|
17
|
+
pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
|
|
18
|
+
dtd:
|
|
19
|
+
model_name: ViT-B-32
|
|
20
|
+
pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
|
|
21
|
+
classification_heads:
|
|
22
|
+
stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
|
|
23
|
+
dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets:
|
|
3
|
+
- sun397
|
|
4
|
+
- stanford-cars
|
|
5
|
+
- /dataset/image_classification/test@test_datasets:
|
|
6
|
+
- sun397
|
|
7
|
+
- stanford-cars
|
|
8
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
9
|
+
_recursive_: false
|
|
10
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
11
|
+
models:
|
|
12
|
+
_pretrained_:
|
|
13
|
+
model_name: ViT-B-32
|
|
14
|
+
pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
|
|
15
|
+
sun397:
|
|
16
|
+
model_name: ViT-B-32
|
|
17
|
+
pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
|
|
18
|
+
stanford-cars:
|
|
19
|
+
model_name: ViT-B-32
|
|
20
|
+
pickle_path: ${...model_dir}/ViT-B-32/Cars/finetuned.pt
|
|
21
|
+
classification_heads:
|
|
22
|
+
sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
|
|
23
|
+
stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets:
|
|
3
|
+
- sun397
|
|
4
|
+
- dtd
|
|
5
|
+
- /dataset/image_classification/test@test_datasets:
|
|
6
|
+
- sun397
|
|
7
|
+
- dtd
|
|
8
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
9
|
+
_recursive_: false
|
|
10
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
11
|
+
models:
|
|
12
|
+
_pretrained_:
|
|
13
|
+
model_name: ViT-B-32
|
|
14
|
+
pickle_path: ${...model_dir}/ViT-B-32/zeroshot.pt
|
|
15
|
+
sun397:
|
|
16
|
+
model_name: ViT-B-32
|
|
17
|
+
pickle_path: ${...model_dir}/ViT-B-32/SUN397/finetuned.pt
|
|
18
|
+
dtd:
|
|
19
|
+
model_name: ViT-B-32
|
|
20
|
+
pickle_path: ${...model_dir}/ViT-B-32/DTD/finetuned.pt
|
|
21
|
+
classification_heads:
|
|
22
|
+
sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
|
|
23
|
+
dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/train@train_datasets: the_eight_tasks
|
|
3
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
4
|
+
- _self_
|
|
5
|
+
_target_: fusion_bench.modelpool.OpenCLIPVisionModelPool
|
|
6
|
+
_recursive_: false
|
|
7
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
8
|
+
models:
|
|
9
|
+
_pretrained_: ${..model_dir}/ViT-L-14/zeroshot.pt
|
|
10
|
+
sun397: ${..model_dir}/ViT-L-14/SUN397/finetuned.pt
|
|
11
|
+
stanford-cars: ${..model_dir}/ViT-L-14/Cars/finetuned.pt
|
|
12
|
+
resisc45: ${..model_dir}/ViT-L-14/RESISC45/finetuned.pt
|
|
13
|
+
eurosat: ${..model_dir}/ViT-L-14/EuroSAT/finetuned.pt
|
|
14
|
+
svhn: ${..model_dir}/ViT-L-14/SVHN/finetuned.pt
|
|
15
|
+
gtsrb: ${..model_dir}/ViT-L-14/GTSRB/finetuned.pt
|
|
16
|
+
mnist: ${..model_dir}/ViT-L-14/MNIST/finetuned.pt
|
|
17
|
+
dtd: ${..model_dir}/ViT-L-14/DTD/finetuned.pt
|
|
18
|
+
classification_heads:
|
|
19
|
+
sun397: ${..model_dir}/ViT-L-14/head_SUN397.pt
|
|
20
|
+
stanford-cars: ${..model_dir}/ViT-L-14/head_Cars.pt
|
|
21
|
+
resisc45: ${..model_dir}/ViT-L-14/head_RESISC45.pt
|
|
22
|
+
eurosat: ${..model_dir}/ViT-L-14/head_EuroSAT.pt
|
|
23
|
+
svhn: ${..model_dir}/ViT-L-14/head_SVHN.pt
|
|
24
|
+
gtsrb: ${..model_dir}/ViT-L-14/head_GTSRB.pt
|
|
25
|
+
mnist: ${..model_dir}/ViT-L-14/head_MNIST.pt
|
|
26
|
+
dtd: ${..model_dir}/ViT-L-14/head_DTD.pt
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
defaults:
|
|
2
2
|
- Seq2SeqLMPool@: _template
|
|
3
|
-
|
|
4
3
|
models:
|
|
5
4
|
_pretrained_:
|
|
6
5
|
_target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
|
|
@@ -37,5 +36,4 @@ models:
|
|
|
37
36
|
_target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
|
|
38
37
|
base_model_path: ${...base_model}
|
|
39
38
|
peft_model_path: tanganke/flan-t5-base_glue-stsb_lora-16
|
|
40
|
-
|
|
41
39
|
base_model: google/flan-t5-base
|
|
@@ -11,7 +11,6 @@ defaults:
|
|
|
11
11
|
- flan-t5-base_glue-stsb_lora-16
|
|
12
12
|
_target_: fusion_bench.modelpool.Seq2SeqLMPool
|
|
13
13
|
_recursive_: false
|
|
14
|
-
|
|
15
14
|
_dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
|
|
16
15
|
test_datasets:
|
|
17
16
|
glue-cola:
|
|
@@ -62,7 +61,6 @@ test_datasets:
|
|
|
62
61
|
name: stsb
|
|
63
62
|
tokenizer: ${...tokenizer}
|
|
64
63
|
split: validation
|
|
65
|
-
|
|
66
64
|
tokenizer:
|
|
67
65
|
_target_: transformers.AutoTokenizer.from_pretrained
|
|
68
66
|
pretrained_model_name_or_path: google/flan-t5-base
|
|
@@ -11,7 +11,6 @@ defaults:
|
|
|
11
11
|
- flan-t5-base_glue-stsb
|
|
12
12
|
_target_: fusion_bench.modelpool.Seq2SeqLMPool
|
|
13
13
|
_recursive_: false
|
|
14
|
-
|
|
15
14
|
_dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
|
|
16
15
|
test_datasets:
|
|
17
16
|
glue-cola:
|
|
@@ -62,7 +61,6 @@ test_datasets:
|
|
|
62
61
|
name: stsb
|
|
63
62
|
tokenizer: ${...tokenizer}
|
|
64
63
|
split: validation
|
|
65
|
-
|
|
66
64
|
tokenizer:
|
|
67
65
|
_target_: transformers.AutoTokenizer.from_pretrained
|
|
68
|
-
pretrained_model_name_or_path: google/flan-t5-base
|
|
66
|
+
pretrained_model_name_or_path: google/flan-t5-base
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
defaults:
|
|
2
2
|
- Seq2SeqLMPool@: _template
|
|
3
|
-
|
|
4
3
|
models:
|
|
5
4
|
_pretrained_:
|
|
6
5
|
_target_: transformers.AutoModelForSeq2SeqLM.from_pretrained
|
|
@@ -37,9 +36,7 @@ models:
|
|
|
37
36
|
_target_: fusion_bench.modelpool.seq2seq_lm.modelpool.load_lora_model
|
|
38
37
|
base_model_path: ${...base_model}
|
|
39
38
|
peft_model_path: tanganke/flan-t5-large_glue-stsb_lora-16
|
|
40
|
-
|
|
41
39
|
tokenizer:
|
|
42
40
|
_target_: transformers.AutoTokenizer.from_pretrained
|
|
43
41
|
pretrained_model_name_or_path: ${..base_model}
|
|
44
|
-
|
|
45
42
|
base_model: google/flan-t5-large
|
|
@@ -1,19 +1,15 @@
|
|
|
1
1
|
_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
|
|
2
|
-
|
|
3
2
|
pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
|
|
4
|
-
|
|
5
3
|
models:
|
|
6
4
|
_pretrained_:
|
|
7
5
|
_target_: fusion_bench.modelpool.seq_classification_lm.create_reward_model_from_pretrained
|
|
8
6
|
pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
|
|
9
7
|
torch_dtype: bfloat16
|
|
10
8
|
use_flash_attention_2: true
|
|
11
|
-
|
|
12
9
|
tokenizer:
|
|
13
10
|
_target_: transformers.AutoTokenizer.from_pretrained
|
|
14
11
|
pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
|
|
15
12
|
pad_token: <|end_of_text|> # do not use eos token (<|eos_id|>) as padding token because it is used as the end of each content
|
|
16
|
-
|
|
17
13
|
train_datasets:
|
|
18
14
|
preference_700k:
|
|
19
15
|
_target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
_target_: fusion_bench.modelpool.SeqenceClassificationModelPool
|
|
2
|
-
|
|
3
2
|
pretrained_model_name_or_path: fusion-bench/Llama-3.2-1B-Instruct_Bradly-Terry-RM_Preference-700k
|
|
4
|
-
|
|
5
3
|
models:
|
|
6
4
|
_pretrained_:
|
|
7
5
|
_target_: transformers.AutoModelForSequenceClassification.from_pretrained
|
|
8
6
|
pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
|
|
9
7
|
torch_dtype: bfloat16
|
|
10
|
-
|
|
11
8
|
tokenizer:
|
|
12
9
|
_target_: transformers.AutoTokenizer.from_pretrained
|
|
13
10
|
pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
_target_: fusion_bench.modelpool.HuggingFaceGPT2ClassificationPool
|
|
2
|
-
|
|
3
2
|
_model_loader: transformers.GPT2Model.from_pretrained
|
|
4
3
|
models:
|
|
5
4
|
_pretrained_:
|
|
@@ -26,7 +25,6 @@ models:
|
|
|
26
25
|
sst2:
|
|
27
26
|
_target_: ${..._model_loader}
|
|
28
27
|
pretrained_model_name_or_path: tanganke/gpt2_sst2
|
|
29
|
-
|
|
30
28
|
# train datasets for RegMean, Fisher Merging ...
|
|
31
29
|
_dataset_loader: fusion_bench.modelpool.huggingface_gpt2_classification.load_gpt2_dataset
|
|
32
30
|
train_datasets:
|
|
@@ -58,7 +56,6 @@ train_datasets:
|
|
|
58
56
|
_target_: ${..._dataset_loader}
|
|
59
57
|
name: sst2
|
|
60
58
|
split: train
|
|
61
|
-
|
|
62
59
|
tokenizer:
|
|
63
60
|
_target_: fusion_bench.modelpool.huggingface_gpt2_classification.load_gpt2_tokenizer
|
|
64
61
|
pretrained_model_name_or_path: gpt2
|
|
@@ -5,10 +5,8 @@ defaults:
|
|
|
5
5
|
- method: simple_average
|
|
6
6
|
- taskpool: nyuv2_taskpool
|
|
7
7
|
- _self_
|
|
8
|
-
|
|
9
8
|
_target_: fusion_bench.programs.FabricModelFusionProgram
|
|
10
9
|
_recursive_: false
|
|
11
|
-
|
|
12
10
|
fast_dev_run: false # Run a single batch of data to test the model or method
|
|
13
11
|
use_lightning: true # Use the fabric to run the experiment
|
|
14
12
|
print_config: true # Print the configuration to the console
|
|
@@ -2,10 +2,8 @@
|
|
|
2
2
|
#
|
|
3
3
|
# defaults:
|
|
4
4
|
# - CLIPVisionModelTaskPool@: _template
|
|
5
|
-
|
|
6
5
|
_target_: fusion_bench.taskpool.CLIPVisionModelTaskPool
|
|
7
6
|
_recursive_: false
|
|
8
|
-
|
|
9
7
|
test_datasets: ??? # The datasets to evaluate the model on
|
|
10
8
|
base_model: openai/clip-vit-base-patch32
|
|
11
9
|
clip_model:
|
|
@@ -21,7 +19,6 @@ dataloader_kwargs:
|
|
|
21
19
|
pin_memory: True # Whether to pin memory in data loader
|
|
22
20
|
drop_last: False # Whether to drop the last incomplete batch
|
|
23
21
|
shuffle: False # Whether to shuffle the data
|
|
24
|
-
|
|
25
22
|
# === layer-wise feature saving ===
|
|
26
23
|
# The path to save the features to, if none then the features are not saved
|
|
27
24
|
# This is the path to a directory, the features of task `task_name` will be saved in `feature_save_path/task_name.csv`
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
3
|
+
- _self_
|
|
4
|
+
_target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
|
|
5
|
+
_recursive_: false
|
|
6
|
+
# name of the base model
|
|
7
|
+
model_name: ViT-B-16
|
|
8
|
+
# path of the checkpoint directory
|
|
9
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
10
|
+
classification_heads:
|
|
11
|
+
sun397: ${..model_dir}/ViT-B-16/head_SUN397.pt
|
|
12
|
+
stanford-cars: ${..model_dir}/ViT-B-16/head_Cars.pt
|
|
13
|
+
resisc45: ${..model_dir}/ViT-B-16/head_RESISC45.pt
|
|
14
|
+
eurosat: ${..model_dir}/ViT-B-16/head_EuroSAT.pt
|
|
15
|
+
svhn: ${..model_dir}/ViT-B-16/head_SVHN.pt
|
|
16
|
+
gtsrb: ${..model_dir}/ViT-B-16/head_GTSRB.pt
|
|
17
|
+
mnist: ${..model_dir}/ViT-B-16/head_MNIST.pt
|
|
18
|
+
dtd: ${..model_dir}/ViT-B-16/head_DTD.pt
|
|
19
|
+
dataloader_kwargs:
|
|
20
|
+
batch_size: 128 # The batch size for the data loader
|
|
21
|
+
num_workers: 8 # The number of worker processes for data loading
|
|
22
|
+
pin_memory: True # Whether to pin memory in data loader
|
|
23
|
+
drop_last: False # Whether to drop the last incomplete batch
|
|
24
|
+
shuffle: False # Whether to shuffle the data
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
3
|
+
- _self_
|
|
4
|
+
_target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
|
|
5
|
+
_recursive_: false
|
|
6
|
+
# name of the base model
|
|
7
|
+
model_name: ViT-B-32
|
|
8
|
+
# path of the checkpoint directory
|
|
9
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
10
|
+
classification_heads:
|
|
11
|
+
sun397: ${..model_dir}/ViT-B-32/head_SUN397.pt
|
|
12
|
+
stanford-cars: ${..model_dir}/ViT-B-32/head_Cars.pt
|
|
13
|
+
resisc45: ${..model_dir}/ViT-B-32/head_RESISC45.pt
|
|
14
|
+
eurosat: ${..model_dir}/ViT-B-32/head_EuroSAT.pt
|
|
15
|
+
svhn: ${..model_dir}/ViT-B-32/head_SVHN.pt
|
|
16
|
+
gtsrb: ${..model_dir}/ViT-B-32/head_GTSRB.pt
|
|
17
|
+
mnist: ${..model_dir}/ViT-B-32/head_MNIST.pt
|
|
18
|
+
dtd: ${..model_dir}/ViT-B-32/head_DTD.pt
|
|
19
|
+
dataloader_kwargs:
|
|
20
|
+
batch_size: 128 # The batch size for the data loader
|
|
21
|
+
num_workers: 8 # The number of worker processes for data loading
|
|
22
|
+
pin_memory: True # Whether to pin memory in data loader
|
|
23
|
+
drop_last: False # Whether to drop the last incomplete batch
|
|
24
|
+
shuffle: False # Whether to shuffle the data
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- /dataset/image_classification/test@test_datasets: the_eight_tasks
|
|
3
|
+
- _self_
|
|
4
|
+
_target_: fusion_bench.taskpool.OpenCLIPVisionModelTaskPool
|
|
5
|
+
_recursive_: false
|
|
6
|
+
# name of the base model
|
|
7
|
+
model_name: ViT-L-14
|
|
8
|
+
# path of the checkpoint directory
|
|
9
|
+
model_dir: ./.cache/task_vectors_checkpoints/
|
|
10
|
+
classification_heads:
|
|
11
|
+
sun397: ${..model_dir}/ViT-L-14/head_SUN397.pt
|
|
12
|
+
stanford-cars: ${..model_dir}/ViT-L-14/head_Cars.pt
|
|
13
|
+
resisc45: ${..model_dir}/ViT-L-14/head_RESISC45.pt
|
|
14
|
+
eurosat: ${..model_dir}/ViT-L-14/head_EuroSAT.pt
|
|
15
|
+
svhn: ${..model_dir}/ViT-L-14/head_SVHN.pt
|
|
16
|
+
gtsrb: ${..model_dir}/ViT-L-14/head_GTSRB.pt
|
|
17
|
+
mnist: ${..model_dir}/ViT-L-14/head_MNIST.pt
|
|
18
|
+
dtd: ${..model_dir}/ViT-L-14/head_DTD.pt
|
|
19
|
+
dataloader_kwargs:
|
|
20
|
+
batch_size: 128 # The batch size for the data loader
|
|
21
|
+
num_workers: 8 # The number of worker processes for data loading
|
|
22
|
+
pin_memory: True # Whether to pin memory in data loader
|
|
23
|
+
drop_last: False # Whether to drop the last incomplete batch
|
|
24
|
+
shuffle: False # Whether to shuffle the data
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
_target_: fusion_bench.taskpool.llama.reward_model.RewardModelEvaluationTaskPool
|
|
2
|
-
|
|
3
2
|
test_datasets:
|
|
4
3
|
preference_700k:
|
|
5
4
|
_target_: fusion_bench.dataset.llama.preference_700k.load_tokenized_preference_700k_for_rlhf
|
|
@@ -7,12 +6,9 @@ test_datasets:
|
|
|
7
6
|
path: hendrydong/preference_700K
|
|
8
7
|
split: train
|
|
9
8
|
cache_path: null
|
|
10
|
-
|
|
11
9
|
dataloader_kwargs:
|
|
12
10
|
shuffle: False
|
|
13
11
|
batch_size: 16
|
|
14
|
-
|
|
15
12
|
tokenizer: ${..modelpool.tokenizer}
|
|
16
|
-
|
|
17
13
|
max_num_samples: 1000
|
|
18
14
|
seed: 42
|
|
File without changes
|
|
File without changes
|
|
File without changes
|