fusion-bench 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/dataset/clip_dataset.py +1 -0
- fusion_bench/method/__init__.py +4 -0
- fusion_bench/method/adamerging/__init__.py +28 -5
- fusion_bench/method/adamerging/resnet_adamerging.py +279 -0
- fusion_bench/method/adamerging/task_wise_adamerging.py +2 -14
- fusion_bench/method/adamerging/utils.py +58 -0
- fusion_bench/method/classification/clip_finetune.py +6 -4
- fusion_bench/method/classification/image_classification_finetune.py +156 -12
- fusion_bench/method/dare/simple_average.py +3 -2
- fusion_bench/method/dare/task_arithmetic.py +3 -2
- fusion_bench/method/dop/__init__.py +1 -0
- fusion_bench/method/dop/dop.py +366 -0
- fusion_bench/method/dop/min_norm_solvers.py +227 -0
- fusion_bench/method/dop/utils.py +73 -0
- fusion_bench/method/simple_average.py +6 -4
- fusion_bench/mixins/lightning_fabric.py +9 -0
- fusion_bench/modelpool/causal_lm/causal_lm.py +2 -1
- fusion_bench/modelpool/resnet_for_image_classification.py +285 -4
- fusion_bench/models/hf_clip.py +4 -7
- fusion_bench/models/hf_utils.py +4 -1
- fusion_bench/taskpool/__init__.py +2 -0
- fusion_bench/taskpool/clip_vision/taskpool.py +1 -1
- fusion_bench/taskpool/resnet_for_image_classification.py +231 -0
- fusion_bench/utils/state_dict_arithmetic.py +91 -10
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/METADATA +9 -3
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/RECORD +140 -77
- fusion_bench_config/fabric/auto.yaml +1 -1
- fusion_bench_config/fabric/loggers/swandb_logger.yaml +5 -0
- fusion_bench_config/fabric/loggers/wandb_logger.yaml +2 -0
- fusion_bench_config/fabric_model_fusion.yaml +1 -0
- fusion_bench_config/method/adamerging/resnet.yaml +18 -0
- fusion_bench_config/method/bitdelta/bitdelta.yaml +3 -0
- fusion_bench_config/method/classification/clip_finetune.yaml +5 -0
- fusion_bench_config/method/classification/image_classification_finetune.yaml +9 -0
- fusion_bench_config/method/depth_upscaling.yaml +9 -0
- fusion_bench_config/method/dop/dop.yaml +30 -0
- fusion_bench_config/method/dummy.yaml +6 -0
- fusion_bench_config/method/ensemble/max_model_predictor.yaml +6 -0
- fusion_bench_config/method/ensemble/simple_ensemble.yaml +8 -1
- fusion_bench_config/method/ensemble/weighted_ensemble.yaml +8 -0
- fusion_bench_config/method/linear/expo.yaml +5 -0
- fusion_bench_config/method/linear/linear_interpolation.yaml +8 -0
- fusion_bench_config/method/linear/llama_expo.yaml +5 -0
- fusion_bench_config/method/linear/llama_expo_with_dare.yaml +3 -0
- fusion_bench_config/method/linear/simple_average_for_causallm.yaml +5 -0
- fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml +3 -0
- fusion_bench_config/method/linear/ties_merging_for_causallm.yaml +5 -0
- fusion_bench_config/method/linear/weighted_average.yaml +3 -0
- fusion_bench_config/method/linear/weighted_average_for_llama.yaml +6 -1
- fusion_bench_config/method/mixtral_moe_merging.yaml +3 -0
- fusion_bench_config/method/mixtral_moe_upscaling.yaml +5 -0
- fusion_bench_config/method/model_recombination.yaml +8 -0
- fusion_bench_config/method/model_stock/model_stock.yaml +4 -1
- fusion_bench_config/method/opcm/opcm.yaml +5 -0
- fusion_bench_config/method/opcm/task_arithmetic.yaml +6 -0
- fusion_bench_config/method/opcm/ties_merging.yaml +5 -0
- fusion_bench_config/method/opcm/weight_average.yaml +5 -0
- fusion_bench_config/method/regmean/clip_regmean.yaml +3 -0
- fusion_bench_config/method/regmean/gpt2_regmean.yaml +3 -0
- fusion_bench_config/method/regmean/regmean.yaml +3 -0
- fusion_bench_config/method/regmean_plusplus/clip_regmean_plusplus.yaml +3 -0
- fusion_bench_config/method/simple_average.yaml +9 -0
- fusion_bench_config/method/slerp/slerp.yaml +9 -0
- fusion_bench_config/method/slerp/slerp_lm.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/causal_lm_upscaling.yaml +6 -0
- fusion_bench_config/method/smile_upscaling/error_accumulation.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/projected_energy.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml +3 -0
- fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +5 -0
- fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml +3 -0
- fusion_bench_config/method/task_arithmetic.yaml +9 -0
- fusion_bench_config/method/ties_merging.yaml +3 -0
- fusion_bench_config/method/wudi/wudi.yaml +3 -0
- fusion_bench_config/model_fusion.yaml +2 -1
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/_generate_config.py +138 -0
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet152_cifar10.yaml +1 -1
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet152_cifar100.yaml +1 -1
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_dtd.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_emnist_letters.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_eurosat.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_fashion_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_fer2013.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_food101.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_gtsrb.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_kmnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford-iiit-pet.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_oxford_flowers102.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_pcam.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_rendered-sst2.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_resisc45.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_stanford-cars.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_stl10.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_sun397.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet152_svhn.yaml +14 -0
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet18_cifar10.yaml +1 -1
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet18_cifar100.yaml +1 -1
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_dtd.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_emnist_letters.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_eurosat.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_fashion_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_fer2013.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_food101.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_gtsrb.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_kmnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford-iiit-pet.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_oxford_flowers102.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_pcam.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_rendered-sst2.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_resisc45.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_stanford-cars.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_stl10.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_sun397.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet18_svhn.yaml +14 -0
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet50_cifar10.yaml +1 -1
- fusion_bench_config/modelpool/{ResNetForImageClassfication → ResNetForImageClassification}/transformers/resnet50_cifar100.yaml +1 -1
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_dtd.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_emnist_letters.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_eurosat.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_fashion_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_fer2013.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_food101.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_gtsrb.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_kmnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_mnist.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford-iiit-pet.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_oxford_flowers102.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_pcam.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_rendered-sst2.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_resisc45.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_stanford-cars.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_stl10.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_sun397.yaml +14 -0
- fusion_bench_config/modelpool/ResNetForImageClassification/transformers/resnet50_svhn.yaml +14 -0
- fusion_bench_config/method/clip_finetune.yaml +0 -26
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/WHEEL +0 -0
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.25.dist-info → fusion_bench-0.2.27.dist-info}/top_level.txt +0 -0
|
@@ -32,6 +32,7 @@ _recursive_: false # Disable recursive instantiation
|
|
|
32
32
|
# =============================================================================
|
|
33
33
|
# Experiment Execution Settings
|
|
34
34
|
# =============================================================================
|
|
35
|
+
seed: null # Random seed for reproducibility
|
|
35
36
|
# Development and debugging options
|
|
36
37
|
fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
|
|
37
38
|
dry_run: false # Show configuration without running experiment
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# for layer-wise adamerging, use fusion_bench.method.ResNetLayerWiseAdamerging
|
|
2
|
+
_target_: fusion_bench.method.ResNetTaskWiseAdamerging
|
|
3
|
+
max_steps: 1000
|
|
4
|
+
init_values: null
|
|
5
|
+
resume_weights_path: null
|
|
6
|
+
# if `clamp_weights` is true, the weights will be clamped to [0, 1]
|
|
7
|
+
clamp_weights: false
|
|
8
|
+
# arguments of `functional_call`
|
|
9
|
+
tie_weights: true
|
|
10
|
+
strict: false
|
|
11
|
+
# optimizer and lr scheduler for test-time adaptation training
|
|
12
|
+
optimizer:
|
|
13
|
+
_target_: torch.optim.Adam
|
|
14
|
+
lr: 1e-3
|
|
15
|
+
lr_scheduler: null
|
|
16
|
+
dataloader_kwargs:
|
|
17
|
+
batch_size: 16
|
|
18
|
+
num_workers: 4
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: BitDelta
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.bitdelta.BitDeltaAlgorithm
|
|
2
5
|
save_dir: null
|
|
3
6
|
save_full_model: false
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: CLIP Finetune
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Finetunes CLIP models with optional LoRA adapters.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
name: clip_finetune
|
|
2
7
|
seed: 42
|
|
3
8
|
learning_rate: 1e-5
|
|
@@ -1,6 +1,15 @@
|
|
|
1
1
|
_target_: fusion_bench.method.classification.ImageClassificationFineTuning
|
|
2
2
|
max_epochs: 10
|
|
3
3
|
max_steps: null
|
|
4
|
+
# if ``save_top_k == k``,
|
|
5
|
+
# the best k models according to the quantity monitored will be saved.
|
|
6
|
+
# If ``save_top_k == 0``, no models are saved.
|
|
7
|
+
# If ``save_top_k == -1``, all models are saved.
|
|
8
|
+
save_top_k: 1
|
|
9
|
+
# Interval (in epochs or steps, determined by `max_epochs` and `max_steps`) between checkpoints.
|
|
10
|
+
save_interval: 1
|
|
11
|
+
save_on_train_epoch_end: true
|
|
12
|
+
training_data_ratio: null
|
|
4
13
|
label_smoothing: 0
|
|
5
14
|
optimizer:
|
|
6
15
|
_target_: torch.optim.SGD
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Depth Upscaling
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Constructs a deeper model by stacking/selecting layers from existing models.
|
|
5
|
+
#
|
|
6
|
+
# - layer_indices: list[int | str] specifying which layers to use. Strings are Python
|
|
7
|
+
# expressions evaluated to lists, e.g., "range(6,12)".
|
|
8
|
+
# - Example: [0, 2, 4, "range(6,12)"] selects 1st, 3rd, 5th, and 7th-12th layers.
|
|
9
|
+
# =============================================================================
|
|
1
10
|
_target_: DepthUpscalingAlgorithm
|
|
2
11
|
# this should be a list of integers or string, indicating the sequence of layers.
|
|
3
12
|
# If the entry is an integer, it will use the n-th layer of the model.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
_target_: fusion_bench.method.dop.dop.ContinualDOPForCLIP
|
|
2
|
+
|
|
3
|
+
# the random seed to use
|
|
4
|
+
seed: null
|
|
5
|
+
# shuffle the order of the models
|
|
6
|
+
shuffle_order: true
|
|
7
|
+
# save the merged model on every step
|
|
8
|
+
save_on_every_step: false
|
|
9
|
+
# evaluate the merged model on every step
|
|
10
|
+
evaluate_on_every_step: true
|
|
11
|
+
|
|
12
|
+
# optimizer (learning rate)
|
|
13
|
+
lr: 1e-4
|
|
14
|
+
# optimizer (num_steps)
|
|
15
|
+
num_steps: 200
|
|
16
|
+
|
|
17
|
+
# weighted loss
|
|
18
|
+
# if mgda is true, use mgda to optimize the loss weights
|
|
19
|
+
mgda: true
|
|
20
|
+
# if mgda is false, this is the weight for the loss of the first task
|
|
21
|
+
alpha: 0.8
|
|
22
|
+
# if mgda is true and ema is ture, using exponential moving average (ema), alpha is the initial value
|
|
23
|
+
ema: true
|
|
24
|
+
# if mgda is true and ema is ture, using exponential moving average (ema), beta is the decay rate
|
|
25
|
+
ema_beta: 0.999
|
|
26
|
+
|
|
27
|
+
# epsilon for svd (the proportion of energy retained)
|
|
28
|
+
svd_epsilon: 0.99999
|
|
29
|
+
# the space to project the delta w (left singular vectors, right singular vectors, or both)
|
|
30
|
+
svd_proj_space: uv # u or v or uv
|
|
@@ -1 +1,7 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Dummy
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# No-op method for testing pipelines and wiring.
|
|
5
|
+
# Instantiates and exits without modifying models.
|
|
6
|
+
# =============================================================================
|
|
1
7
|
_target_: fusion_bench.method.DummyAlgorithm
|
|
@@ -1 +1,7 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Max Model Predictor
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Selects the model with maximum confidence or performance per example/task.
|
|
5
|
+
# No additional hyperparameters are required.
|
|
6
|
+
# =============================================================================
|
|
1
7
|
_target_: fusion_bench.method.MaxModelPredictorAlgorithm
|
|
@@ -1,2 +1,9 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Simple Ensemble
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Averages model predictions uniformly.
|
|
5
|
+
#
|
|
6
|
+
# device_map: leave null for single device or provide a mapping for multi-device setups.
|
|
7
|
+
# =============================================================================
|
|
1
8
|
_target_: fusion_bench.method.SimpleEnsembleAlgorithm
|
|
2
|
-
device_map: null
|
|
9
|
+
device_map: null # Set to null for single device, or specify mapping
|
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Weighted Ensemble
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Ensembles model predictions using specified per-model weights.
|
|
5
|
+
#
|
|
6
|
+
# - Set normalize=true to rescale weights to sum to 1.
|
|
7
|
+
# - weights: one float per model in the pool (order-sensitive). If null, uses equal weights.
|
|
8
|
+
# =============================================================================
|
|
1
9
|
_target_: fusion_bench.method.WeightedEnsembleAlgorithm
|
|
2
10
|
normalize: true
|
|
3
11
|
# this should be a list of floats, one for each model in the ensemble
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: ExPO
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Extrapolates from pretrained to finetuned direction by a factor.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
# This algorithm merges a pretrained model with a finetuned model.
|
|
2
7
|
#
|
|
3
8
|
# $$\theta_{merged} = \theta_{ft} + \alpha (\theta_{ft} - \theta_{pre})$$
|
|
@@ -1,2 +1,10 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Linear Interpolation
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Interpolates between two models: (1 - t) * model0 + t * model1
|
|
5
|
+
#
|
|
6
|
+
# - t in [0,1]: 0 returns model0; 1 returns model1.
|
|
7
|
+
# - Only meaningful for two-model pools.
|
|
8
|
+
# =============================================================================
|
|
1
9
|
_target_: fusion_bench.method.LinearInterpolationAlgorithm
|
|
2
10
|
t: 0.5
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: ExPO for LLaMA
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# LLaMA-specific ExPO with backbone-only and attention scaling options.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
# This algorithm merges a pretrained model with a finetuned model.
|
|
2
7
|
#
|
|
3
8
|
# $$\theta_{merged} = \theta_{ft} + \alpha (\theta_{ft} - \theta_{pre})$$
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: ExPO with DARE (LLaMA)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.linear.llama_expo.ExPOWithDareForLLama
|
|
2
5
|
extrapolation_factor: 0.1
|
|
3
6
|
attention_scaling_factor: 1.0
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Simple Average (Causal LM)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Uniformly averages causal LM weights with optional backbone-only.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.SimpleAverageForCausalLM
|
|
2
7
|
# set `merge_backbone` to true if you has a base model and only want to merge the backbone of the experts
|
|
3
8
|
# if `merge_backbone` is False, this is equivalent to `SimpleAverageAlgorithm`
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Task Arithmetic (Causal LM)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.TaskArithmeticForCausalLM
|
|
2
5
|
scaling_factor: 0.3
|
|
3
6
|
merge_backbone: false
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: TIES Merging (Causal LM)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# TIES merging adapted for causal language models with optional backbone-only.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.TiesMergingForCausalLM
|
|
2
7
|
# Scaling factor $\lambda$
|
|
3
8
|
scaling_factor: 0.3
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Weighted Average (Linear)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.WeightedAverageAlgorithm
|
|
2
5
|
normalize: true # if true, the weights will be normalized before merging
|
|
3
6
|
weights: # List of weights for each model
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Weighted Average for LLaMA
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Like Weighted Average but supports merging only backbone and saving tokenizer.
|
|
5
|
+
# =============================================================================
|
|
6
|
+
_target_: fusion_bench.method.WeightedAverageForLLama
|
|
2
7
|
normalize: true # if true, the weights will be normalized before merging
|
|
3
8
|
weights: # List of weights for each model
|
|
4
9
|
- 0.5
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Mixtral MoE Merging/Upscaling
|
|
3
|
+
# =============================================================================
|
|
1
4
|
name: mixtral_moe_upscaling # or "mixtral_for_causal_lm_moe_upscaling"
|
|
2
5
|
experts_per_token: 2
|
|
3
6
|
# path to save the upscaled model
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Mixtral MoE Upscaling
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Upscales a base model into a Mixture-of-Experts variant (Mixtral family).
|
|
5
|
+
# =============================================================================
|
|
1
6
|
# or fusion_bench.method.MixtralUpscalingAlgorithm
|
|
2
7
|
_target_: fusion_bench.method.MixtralForCausalLMUpscalingAlgorithm
|
|
3
8
|
num_experts: 4
|
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Model Recombination
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Recombines submodules/layers from multiple models to form a new model.
|
|
5
|
+
#
|
|
6
|
+
# - return_modelpool: override run() argument to return model pool instead of merged model.
|
|
7
|
+
# Set to null to respect runtime argument; set to true/false to force behavior.
|
|
8
|
+
# =============================================================================
|
|
1
9
|
_target_: fusion_bench.method.ModelRecombinationAlgorithm
|
|
2
10
|
# if `return_model_pool` is not null, the argument `return_modelpool` passed to the `run` method will be ignored.
|
|
3
11
|
return_modelpool: null
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Model Stock
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.model_stock.ModelStock
|
|
2
5
|
ignore_keys:
|
|
3
6
|
[
|
|
@@ -9,4 +12,4 @@ ignore_keys:
|
|
|
9
12
|
"model.ln_final.bias",
|
|
10
13
|
]
|
|
11
14
|
model_save_path: ${path.log_dir}/checkpoint
|
|
12
|
-
model_save_kwargs: null
|
|
15
|
+
model_save_kwargs: null
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: OPCM
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Incrementally merges models via SVD projection and evaluation per step.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.opcm.opcm.OPCMForCLIP
|
|
2
7
|
# shuffle the order of the models
|
|
3
8
|
shuffle_order: true
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Continual Task Arithmetic
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Applies task arithmetic incrementally across a stream of models.
|
|
5
|
+
# Maintains per-step save/eval similar to OPCM.
|
|
6
|
+
# =============================================================================
|
|
1
7
|
_target_: fusion_bench.method.opcm.task_arithmetic.ContinualTaskArithmeticForCLIP
|
|
2
8
|
scaling_factor: 0.3
|
|
3
9
|
# shuffle the order of the models
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Continual TIES Merging
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Continual variant of TIES merging with per-step save/eval instrumentation.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.opcm.ties_merging.ContinualTiesMergingForCLIP
|
|
2
7
|
# Scaling factor $\lambda$
|
|
3
8
|
scaling_factor: 0.5
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Continual Weighted Average
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Incrementally averages model weights as new models arrive.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.opcm.weight_average.ContinualWeightAverageForCLIP
|
|
2
7
|
# shuffle the order of the models
|
|
3
8
|
shuffle_order: true
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: RegMean (CLIP)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.RegMeanAlgorithmForCLIP
|
|
2
5
|
# list, regular expression of names of parameters that need to be excluded
|
|
3
6
|
exclude_param_names_regex: []
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: RegMean (GPT-2)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.RegMeanAlgorithmForGPT2
|
|
2
5
|
# list, regular expression of names of parameters that need to be excluded
|
|
3
6
|
exclude_param_names_regex: []
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: RegMean (Base)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: ???
|
|
2
5
|
num_regmean_examples: 256
|
|
3
6
|
reduce_non_diagonal_ratio: 0.1
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: RegMean++ (CLIP)
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.RegMeanAlgorithmForCLIPPlusPlus
|
|
2
5
|
# list, regular expression of names of parameters that need to be excluded
|
|
3
6
|
exclude_param_names_regex: []
|
|
@@ -1 +1,10 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Simple Average
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Equally averages parameters of all models in the model pool.
|
|
5
|
+
#
|
|
6
|
+
# Usage notes
|
|
7
|
+
# - No hyperparameters required; behavior is deterministic given model order.
|
|
8
|
+
# - Ensure models are architecture-compatible (same shapes) before merging.
|
|
9
|
+
# =============================================================================
|
|
1
10
|
_target_: fusion_bench.method.SimpleAverageAlgorithm
|
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Spherical Linear Interpolation (SLERP)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Interpolates between two parameter vectors on a hypersphere.
|
|
5
|
+
#
|
|
6
|
+
# - t in [0,1]: interpolation factor; 0 returns model0; 1 returns model1.
|
|
7
|
+
# - DOT_THRESHOLD: threshold to switch to linear interpolation when vectors are near-aligned.
|
|
8
|
+
# - epsilon: small constant to avoid division by zero.
|
|
9
|
+
# =============================================================================
|
|
1
10
|
_target_: fusion_bench.method.SlerpMergeAlgorithm
|
|
2
11
|
t: 0.5 # interpolation factor
|
|
3
12
|
DOT_THRESHOLD: 0.9995
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SLERP for Causal LM
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Spherical linear interpolation between two causal language models.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.SlerpForCausalLM
|
|
2
7
|
t: 0.5
|
|
3
8
|
model_save_path: ${path.log_dir}/checkpoint
|
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SMILE Upscaling (Causal LM)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Upscales causal language models (Qwen2/Llama/Mistral) with SMILE.
|
|
5
|
+
# Supports auto-detection or explicit model_type override.
|
|
6
|
+
# =============================================================================
|
|
1
7
|
# Generic SMILE Upscaling Configuration for CausalLM models
|
|
2
8
|
# Supports: Qwen2, Llama, Mistral models
|
|
3
9
|
# The model type will be auto-detected from the base model
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Analysis Configuration: SMILE Error Accumulation
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Analyzes how approximation error accumulates with gating and rank parameters.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
# Measure error accumulation
|
|
2
7
|
_target_: fusion_bench.method.smile_upscaling.error_accumulation.ErrorAccumulationAnalysisForCLIP
|
|
3
8
|
gate_k: 16
|
|
@@ -1,2 +1,7 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Analysis Configuration: SMILE Projected Energy
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Measures projected energy retained by singular vectors during SMILE upscaling.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
# Measure projected energy
|
|
2
7
|
_target_: fusion_bench.method.smile_upscaling.projected_energy.ProjectedEnergyAnalysis
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SMILE Singular Projection Merging
|
|
3
|
+
# =============================================================================
|
|
1
4
|
name: singular_projection_merging
|
|
2
5
|
# merge device on cuda can accelerate the SVD computation
|
|
3
6
|
device: cuda
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SMILE Upscaling (Mistral)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Specialized SMILE upscaling for Mistral models with rank settings.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm
|
|
2
7
|
# device to put the models on
|
|
3
8
|
device: cpu
|
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SMILE Upscaling (Qwen2)
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Specialized SMILE upscaling for Qwen2 models with rank settings.
|
|
5
|
+
# =============================================================================
|
|
1
6
|
_target_: fusion_bench.method.smile_upscaling.smile_qwen2_upscaling.SmileQwen2UpscalingAlgorithm
|
|
2
7
|
# device to put the models on
|
|
3
8
|
device: cpu
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: SMILE Upscaling
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.SmileUpscalingAlgorithm
|
|
2
5
|
# merge device on cuda can accelerate the SVD computation
|
|
3
6
|
device: cpu
|
|
@@ -1,2 +1,11 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Task Arithmetic
|
|
3
|
+
# =============================================================================
|
|
4
|
+
# Performs task vector arithmetic: base + lambda * \sum_i (task_i - base).
|
|
5
|
+
#
|
|
6
|
+
# Notes
|
|
7
|
+
# - scaling_factor controls the contribution of the task delta.
|
|
8
|
+
# - Model compatibility is required (matching parameter shapes).
|
|
9
|
+
# =============================================================================
|
|
1
10
|
_target_: fusion_bench.method.TaskArithmeticAlgorithm
|
|
2
11
|
scaling_factor: 0.3
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: Ties Merging
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.TiesMergingAlgorithm
|
|
2
5
|
# Scaling factor $\lambda$
|
|
3
6
|
scaling_factor: 0.3
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# =============================================================================
|
|
2
|
+
# FusionBench Method Configuration: WUDI Merging
|
|
3
|
+
# =============================================================================
|
|
1
4
|
_target_: fusion_bench.method.WUDIMerging
|
|
2
5
|
|
|
3
6
|
iter_num: 400
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
# =============================================================================
|
|
4
4
|
# This configuration file defines the settings for running model fusion experiments
|
|
5
5
|
# within FusionBench using standard PyTorch (without Lightning Fabric).
|
|
6
|
-
#
|
|
6
|
+
#
|
|
7
7
|
# The configuration includes:
|
|
8
8
|
#
|
|
9
9
|
# - Hydra framework settings and overrides
|
|
@@ -30,6 +30,7 @@ _recursive_: false # Disable recursive instantiation
|
|
|
30
30
|
# =============================================================================
|
|
31
31
|
# Experiment Execution Settings
|
|
32
32
|
# =============================================================================
|
|
33
|
+
seed: null # Random seed for reproducibility
|
|
33
34
|
# Development and debugging options
|
|
34
35
|
fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
|
|
35
36
|
dry_run: false # Show configuration without running experiment
|