PyPI - fusion-bench - Versions diffs - 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

fusion_bench_config/method/lm_finetune/fullfinetune_sft.yaml CHANGED Viewed

@@ -1,12 +1,10 @@
 _target_: fusion_bench.method.FullFinetuneSFT
 _recursive_: False
 optimizer:
   _target_: torch.optim.AdamW
   lr: 1e-5
   weight_decay: 0.01
   fused: null
 lr_scheduler:
   _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
   T_max: _T_max_ # this will be replaced by the expected number of training steps
@@ -14,13 +12,11 @@ lr_scheduler:
   warmup_steps: 100
   max_lr: ${..optimizer.lr}
   min_lr: 1e-6
 dataloader_kwargs:
   # per-gpu batch size
   batch_size: 1
   num_workers: 0
   pin_memory: True
 # Training hyperparameters
 # if max_epochs=-1, max_steps will be used to determine the number of training steps
 max_epochs: 3

fusion_bench_config/method/lm_finetune/peftfinetune_sft.yaml CHANGED Viewed

@@ -1,23 +1,19 @@
 _target_: fusion_bench.method.PeftFinetuneSFT
 _recursive_: False
 optimizer:
   _target_: torch.optim.AdamW
   lr: 1e-4
   weight_decay: 0.01
   fused: null
 lr_scheduler:
   _target_: torch.optim.lr_scheduler.CosineAnnealingLR
   T_max: _T_max_ # this will be replaced by the expected number of training steps
   eta_min: 1e-6
 dataloader_kwargs:
   # per-gpu batch size
   batch_size: 1
   num_workers: 0
   pin_memory: True
 peft_config:
   _target_: peft.LoraConfig
   task_type: peft.TaskType.CAUSAL_LM
@@ -33,11 +29,9 @@ peft_config:
   lora_alpha: 16
   lora_dropout: 0
   bias: none
 adapter_name: default
 # whether to merge and unload the adapter after training
 merge_and_unload: false
 # Training hyperparameters
 # if max_epochs=-1, max_steps will be used to determine the number of training steps
 max_epochs: 3

fusion_bench_config/method/mixtral_moe_upscaling.yaml CHANGED Viewed

@@ -1,6 +1,5 @@
 # or fusion_bench.method.MixtralUpscalingAlgorithm
-_target_: fusion_bench.method.MixtralForCausalLMUpscalingAlgorithm
+_target_: fusion_bench.method.MixtralForCausalLMUpscalingAlgorithm
 num_experts: 4
 experts_per_token: 2
 # path to save the upscaled model

fusion_bench_config/method/model_recombination.yaml CHANGED Viewed

@@ -1,4 +1,3 @@
 _target_: fusion_bench.method.ModelRecombinationAlgorithm
 # if `return_model_pool` is not null, the argument `return_modelpool` passed to the `run` method will be ignored.
 return_modelpool: null

fusion_bench_config/method/opcm/opcm.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.opcm.opcm.OPCMForCLIP
 # shuffle the order of the models
 shuffle_order: true
 # the scaling factor for the SVD projection

fusion_bench_config/method/opcm/task_arithmetic.yaml CHANGED Viewed

@@ -1,7 +1,5 @@
 _target_: fusion_bench.method.opcm.task_arithmetic.ContinualTaskArithmeticForCLIP
 scaling_factor: 0.3
 # shuffle the order of the models
 shuffle_order: true
 # the random seed to use

fusion_bench_config/method/opcm/ties_merging.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.opcm.ties_merging.ContinualTiesMergingForCLIP
 # Scaling factor $\lambda$
 scaling_factor: 0.5
 threshold: 20
@@ -7,7 +6,6 @@ threshold: 20
 remove_keys: []
 # Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max'
 merge_func: sum
 # shuffle the order of the models
 shuffle_order: true
 # the random seed to use

fusion_bench_config/method/opcm/weight_average.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.opcm.weight_average.ContinualWeightAverageForCLIP
 # shuffle the order of the models
 shuffle_order: true
 # the random seed to use

fusion_bench_config/method/pwe_moe/epo_for_openclip.yaml ADDED Viewed

@@ -0,0 +1,30 @@
+_target_: fusion_bench.method.pwe_moe.openclip_pwe_moe.PWEMoEExactParetoOptimalForOpenCLIP
+#! === Model Architecture Arguments ===
+# if true, then we only apply the weight ensembling MoE to MLPs, else, we apply it to all layers
+partial: true
+# weight-ensembling MoE arguments
+# initial outputs for the routing gates and the merging weights for the remaining layers
+init_lambda: 0.3
+# number of hidden layers in the routing gate
+router_hidden_layers: 2
+# path to the checkpoint file, if not provided, then the training is performed
+checkpoint_path: null
+#! === Training Arguments ===
+# if false, the training is skipped
+run_train: true
+num_steps: 2000
+save_interval: 1000
+# learning rate
+lr: 1e-2
+alpha: 1 # alpha for dirichlet, if alpha=1, then it is uniform
+# dataloader arguments
+dataloader_kwargs:
+  # per-device batch size
+  batch_size: 16
+  num_workers: 0
+#! === Evaluation Arguments ===
+# if false, the evaluation is skipped
+run_eval: false
+# if true, then we only evaluate the model on the first 20 batches of the test dataset
+quick_evaluation: false
+num_evaluation_samples: equal_weight

fusion_bench_config/method/pwe_moe/ls_for_openclip.yaml ADDED Viewed

@@ -0,0 +1,30 @@
+_target_: fusion_bench.method.pwe_moe.openclip_pwe_moe.PWEMoELinearScalarizationForOpenCLIP
+#! === Model Architecture Arguments ===
+# if true, then we only apply the weight ensembling MoE to MLPs, else, we apply it to all layers
+partial: true
+# weight-ensembling MoE arguments
+# initial outputs for the routing gates and the merging weights for the remaining layers
+init_lambda: 0.3
+# number of hidden layers in the routing gate
+router_hidden_layers: 2
+# path to the checkpoint file, if not provided, then the training is performed
+checkpoint_path: null
+#! === Training Arguments ===
+# if false, the training is skipped
+run_train: true
+num_steps: 2000
+save_interval: 1000
+# learning rate
+lr: 1e-2
+alpha: 1 # alpha for dirichlet, if alpha=1, then it is uniform
+# dataloader arguments
+dataloader_kwargs:
+  # per-device batch size
+  batch_size: 16
+  num_workers: 0
+#! === Evaluation Arguments ===
+# if false, the evaluation is skipped
+run_eval: false
+# if true, then we only evaluate the model on the first 20 batches of the test dataset
+quick_evaluation: false
+num_evaluation_samples: equal_weight

fusion_bench_config/method/{pwe_moe_ls_for_clip.yaml → pwe_moe/pwe_moe_ls_for_clip.yaml} RENAMED Viewed

@@ -1,22 +1,23 @@
 _target_: fusion_bench.method.PWEMoELinearScalarizationForCLIP # or PWEMoExactParetoOptimalForCLIP
+#! === Model Architecture Arguments ===
 upscale_mlp: true
 upscale_attn: true
 # scaling factor for the remaining parameters
 init_lambda: 0.3
 router_hidden_layers: 2
+#! === Training Arguments ===
 lr: 1e-5
 num_steps: 8000
 save_interval: 2000
 alpha: 1 # alpha for dirichlet, if alpha=1, then it is uniform
 # load model from this checkpoint
+dataloader_kwargs:
+  # per-device batch size
+  batch_size: 16
+  num_workers: 4
 checkpoint_path: null
+#! === Evaluation Arguments ===
 # evaluation grid
 eval_grid: true
 eval_grid_n: 8
 eval_grid_m: 2
-dataloader_kwargs:
-  # per-device batch size
-  batch_size: 16
-  num_workers: 4

fusion_bench_config/method/rankone_moe/rankone_moe.yaml CHANGED Viewed

@@ -6,12 +6,10 @@ save_checkpoint: False
 router_hidden_layers: 1
 init_lambda: 0.3
 batch_reduce: true
 # device to compute svd
 svd_accelerator: cuda
 rank_k: 32 # How many experts are added to the pool per task?
-select_k: -1  # How many experts are selected from the pool to merge? Range is (1, rank_k*task_num). In particular -1: All the experts in the pool
+select_k: -1 # How many experts are selected from the pool to merge? Range is (1, rank_k*task_num). In particular -1: All the experts in the pool
 # learning rate
 lr: 1e-4
 optimizer: adam

fusion_bench_config/method/regmean/gpt2_regmean.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.RegMeanAlgorithmForGPT2
 # list, regular expression of names of parameters that need to be excluded
 exclude_param_names_regex: []
 # numbers of examples to compute regmean weights

fusion_bench_config/method/slerp/slerp.yaml CHANGED Viewed

@@ -1,6 +1,4 @@
 _target_: fusion_bench.method.SlerpMergeAlgorithm
 t: 0.5 # interpolation factor
 DOT_THRESHOLD: 0.9995
 epsilon: 1e-8

fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml CHANGED Viewed

@@ -17,4 +17,4 @@ sparsity_ratio: 0.5
 n: 2
 m: 4
 # string to specify the path to where the pruned model is saved
-model_save_path: null
+model_save_path: null

fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml CHANGED Viewed

@@ -17,4 +17,4 @@ sparsity_ratio: 0.5
 n: 2
 m: 4
 # string to specify the path to where the pruned model is saved
-model_save_path: null
+model_save_path: null

fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml CHANGED Viewed

@@ -16,4 +16,4 @@ sparsity_ratio: 0.5
 n: 2
 m: 4
 # string to specify the path to where the pruned model is saved
-model_save_path: null
+model_save_path: null

fusion_bench_config/method/surgery/adamerging_surgery.yaml CHANGED Viewed

@@ -21,7 +21,6 @@ fast_dev_run: ${fast_dev_run}
 # the path for saving the merging weights
 save_merging_weights: 'merging_weights.pt'
 cache_dir: outputs
 # parameters of Surgery
 eval_iterations: 200
-surgery_steps: 1000
+surgery_steps: 1000

fusion_bench_config/method/task_arithmetic.yaml CHANGED Viewed

@@ -1,2 +1,2 @@
 _target_: fusion_bench.method.TaskArithmeticAlgorithm
-scaling_factor: 0.5
+scaling_factor: 0.3

fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml CHANGED Viewed

@@ -1,6 +1,5 @@
 _target_: fusion_bench.method.TaskSingularVectorMerging
 remove_keys: null
 # alpha is a float or a list of floats
 # example:
 # alpha: 1

fusion_bench_config/method/ties_merging.yaml CHANGED Viewed

@@ -1,6 +1,6 @@
 _target_: fusion_bench.method.TiesMergingAlgorithm
 # Scaling factor $\lambda$
-scaling_factor: 0.5
+scaling_factor: 0.3
 threshold: 20
 # List of keys to remove from the state dict, default is empty
 remove_keys: []

fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.trust_region.clip_task_arithmetic.TaskArithmeticWithTrustRegionForCLIP
 scaling_factor: 0.3
 threshold_quantile: 0.99
 max_samples: 128

fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml CHANGED Viewed

@@ -1,32 +1,25 @@
 name: ??? # this can be sparse_clip_weight_ensembling_moe
 # the path for loading the model weights, if specified, skip the test-time adaptation training
 #checkpoint: /home/enneng/fusion_bench/outputs/sparse_we_moe/shared_gate/1routerlayer_clip-vit-base-patch32_TA8_sparse_weight_ensembling_moe_checkpoint_0_0.ckpt
 checkpoint: False
 # the path for saving the model weights.
 save_checkpoint: False
 # router
 router_hidden_layers: 2
 init_lambda: 0.3
 batch_reduce: true
 # sparse task vectors
 tv_prune_ratio: 0.9
 # sparse gate module
 post_sparse_gate: False
 gate_prune_ratio: 0.0
 # shared gate
 shared_gate: true
 position_encoding: false
 position_encoding_dim: 8
 # tta learning rate
 lr: 1e-4
 optimizer: adam
 # this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
 devices: 1
 batch_size: 16
@@ -34,6 +27,5 @@ num_workers: 16
 max_steps: 1000
 # if true, we will use the gradient accumulation across tasks to save memory
 use_grad_accumulate: false
 cache_dir: outputs
 fast_dev_run: ${fast_dev_run}

fusion_bench_config/model/clip-vit/clip-vit-base-patch16_cifar10.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- cifar10: tanganke/clip-vit-base-patch16_cifar10
1	+ cifar10: tanganke/clip-vit-base-patch16_cifar10

fusion_bench_config/model/clip-vit/clip-vit-large-patch14.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- _pretrained_: openai/clip-vit-large-patch14
1	+ _pretrained_: openai/clip-vit-large-patch14

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford-iiit-pet.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet
1	+ oxford-iiit-pet: tanganke/clip-vit-large-patch14_oxford-iiit-pet

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_oxford_flowers102.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102
1	+ oxford_flowers102: tanganke/clip-vit-large-patch14_oxford_flowers102

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_pcam.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- pcam: tanganke/clip-vit-large-patch14_pcam
1	+ pcam: tanganke/clip-vit-large-patch14_pcam

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_rendered-sst2.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2
1	+ rendered-sst2: tanganke/clip-vit-large-patch14_rendered-sst2

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_resisc45.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- resisc45: tanganke/clip-vit-large-patch14_resisc45
1	+ resisc45: tanganke/clip-vit-large-patch14_resisc45

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stanford-cars.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars
1	+ stanford-cars: tanganke/clip-vit-large-patch14_stanford-cars

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_stl10.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- stl10: tanganke/clip-vit-large-patch14_stl10
1	+ stl10: tanganke/clip-vit-large-patch14_stl10

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_sun397.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- sun397: tanganke/clip-vit-large-patch14_sun397
1	+ sun397: tanganke/clip-vit-large-patch14_sun397

fusion_bench_config/model/clip-vit/clip-vit-large-patch14_svhn.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- svhn: tanganke/clip-vit-large-patch14_svhn
1	+ svhn: tanganke/clip-vit-large-patch14_svhn

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_TA8_lora.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.modelpool.CLIPVisionModelPool
 models:
   _pretrained_:
     _target_: fusion_bench.models.linearized.vision_model.load_fft_vision_model_hf
@@ -44,10 +43,8 @@ models:
     base_model_name: openai/clip-vit-base-patch16
     peft_name: tanganke/clip-vit-base-patch16_dtd_lora-16
     merge_and_unload: true
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: openai/clip-vit-base-patch16
 train_datasets: null
 test_datasets: null

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual.yaml CHANGED Viewed

@@ -6,14 +6,11 @@
 #   ...
 defaults:
   - CLIPVisionModelPool@: _template
 models:
   _pretrained_:
     _target_: transformers.CLIPVisionModel.from_pretrained
     pretrained_model_name_or_path: ${...base_model}
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: ${..base_model}
 base_model: openai/clip-vit-base-patch16

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch16_individual_lora.yaml CHANGED Viewed

@@ -1,14 +1,11 @@
 _target_: fusion_bench.modelpool.CLIPVisionModelPool
 models:
   sun397:
     _target_: fusion_bench.models.linearized.vision_model.load_lora_vision_model_hf
     base_model_name: openai/clip-vit-base-patch16
     peft_name: tanganke/clip-vit-base-patch16_sun397_lora-16
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: openai/clip-vit-base-patch16
 train_datasets: null
 test_datasets: null

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_TA8_control_task.yaml CHANGED Viewed

@@ -12,13 +12,10 @@ defaults:
       - clip-vit-base-patch32_dtd
   - /dataset/image_classification/train@train_datasets:
       - tiny-imagenet
 _target_: fusion_bench.modelpool.CLIPVisionModelPool
 _recursive_: false
 models: ???
 train_datasets: ???
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: openai/clip-vit-base-patch32

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml CHANGED Viewed

@@ -1,13 +1,10 @@
 defaults:
   - CLIPVisionModelPool@: _template
 models:
   _pretrained_:
     _target_: transformers.CLIPVisionModel.from_pretrained
     pretrained_model_name_or_path: ${...base_model}
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: ${..base_model}
 base_model: openai/clip-vit-base-patch32

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_task_projection.yaml CHANGED Viewed

@@ -3,13 +3,10 @@ defaults:
       - clip-vit-base-patch32
       - clip-vit-base-patch32_sun397
       - clip-vit-base-patch32_stanford-cars
 _target_: fusion_bench.modelpool.CLIPVisionModelPool
 _recursive_: false
 train_datasets: null
 test_datasets: null
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: openai/clip-vit-base-patch32

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml CHANGED Viewed

@@ -2,17 +2,13 @@ defaults:
   - _self_
   - /dataset/image_classification/train@train_datasets:
       - tiny-imagenet
 _target_: fusion_bench.modelpool.CLIPVisionModelPool
 _recursive_: false
 models:
   _pretrained_: openai/clip-vit-base-patch32
   model_1: tanganke/clip-vit-base-patch32_sun397
   model_2: tanganke/clip-vit-base-patch32_stanford-cars
 train_datasets: ???
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: openai/clip-vit-base-patch32

fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml CHANGED Viewed

@@ -6,14 +6,11 @@
 #   ...
 defaults:
   - CLIPVisionModelPool@: _template
 models:
   _pretrained_:
     _target_: transformers.CLIPVisionModel.from_pretrained
     pretrained_model_name_or_path: ${...base_model}
 processor:
   _target_: transformers.CLIPProcessor.from_pretrained
   pretrained_model_name_or_path: ${..base_model}
 base_model: openai/clip-vit-large-patch14

fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml CHANGED Viewed

@@ -1,17 +1,13 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
 models:
   _pretrained_:
     _target_: transformers.AutoModelForCausalLM.from_pretrained
     pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
     torch_dtype: bfloat16
 tokenizer:
   _target_: transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
 train_datasets:
   alpaca-cleaned:
     _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset

fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml CHANGED Viewed

@@ -1,17 +1,13 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
 models:
   _pretrained_:
     _target_: transformers.AutoModelForCausalLM.from_pretrained
     pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
     torch_dtype: bfloat16
 tokenizer:
   _target_: transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
 train_datasets:
   codealpaca:
     _target_: fusion_bench.dataset.llama.alpaca.load_tokenized_alpaca_dataset

fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml CHANGED Viewed

@@ -12,7 +12,6 @@ models:
   expert_2:
     _target_: transformers.LlamaForCausalLM.from_pretrained
     pretrained_model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
 model_kwargs:
   torch_dtype: float16
 tokenizer:

fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml CHANGED Viewed

@@ -1,17 +1,13 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 pretrained_model_name_or_path: meta-llama/Llama-3.2-1B-Instruct
 models:
   _pretrained_:
     _target_: transformers.AutoModelForCausalLM.from_pretrained
     pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
     torch_dtype: bfloat16
 tokenizer:
   _target_: transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
 train_datasets:
   metamathqa:
     _target_: fusion_bench.dataset.llama.metamathqa.load_tokenized_metamathqa

fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml CHANGED Viewed

@@ -1,17 +1,13 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 pretrained_model_name_or_path: meta-llama/Llama-3-1B-Instruct
 models:
   _pretrained_:
     _target_: transformers.AutoModelForCausalLM.from_pretrained
     pretrained_model_name_or_path: ${...pretrained_model_name_or_path}
     torch_dtype: bfloat16
 tokenizer:
   _target_: transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: ${..pretrained_model_name_or_path}
 train_datasets:
   ultrachat-200k:
     _target_: fusion_bench.dataset.llama.ultrachat.load_tokenized_ultrachat_200k

fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml CHANGED Viewed

@@ -13,7 +13,6 @@ models:
   expert_3:
     _target_: transformers.AutoModelForCausalLM.from_pretrained
     pretrained_model_name_or_path: uukuguy/speechless-code-mistral-7b-v1.0
 model_kwargs:
   torch_dtype: float16
 tokenizer:

fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml CHANGED Viewed

@@ -6,12 +6,9 @@ models:
   _pretrained_:
     _target_: transformers.LlamaForCausalLM.from_pretrained
     pretrained_model_name_or_path: ${...base_model}
 model_kwargs:
   torch_dtype: float16
 tokenizer:
   _target_: transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: ${..base_model}
 base_model: decapoda-research/llama-7b-hf

fusion-bench 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl