PyPI - fusion-bench - Versions diffs - 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.13.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (80.3.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml CHANGED Viewed

@@ -3,10 +3,9 @@ defaults:
   - fabric: auto
   # --- Model, Method, Task ---
   - modelpool: CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted
-  - method: dummy
+  - method: dummy # change this to the method you want to use
   - taskpool: CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted
   - _self_
 _target_: fusion_bench.programs.FabricModelFusionProgram
 _recursive_: false
 fast_dev_run: false # Run a single batch of data to test the model or method

fusion_bench_config/dataset/image_classification/test/TALL20.yaml CHANGED Viewed

@@ -25,4 +25,3 @@ defaults:
   - emnist_letters
   - kmnist
   - rendered-sst2

fusion_bench_config/dataset/image_classification/test/emnist_letters.yaml CHANGED Viewed

@@ -2,4 +2,3 @@ emnist_letters:
   _target_: datasets.load_dataset
   path: tanganke/emnist_letters
   split: test

fusion_bench_config/dataset/image_classification/test/fashion_mnist.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
 fashion_mnist:
   _target_: datasets.load_dataset
   path: zalando-datasets/fashion_mnist
-  split: test
+  split: test

fusion_bench_config/dataset/image_classification/train/TALL20.yaml CHANGED Viewed

@@ -25,4 +25,3 @@ defaults:
   - emnist_letters
   - kmnist
   - rendered-sst2

fusion_bench_config/dataset/image_classification/train/fashion_mnist.yaml CHANGED Viewed

@@ -1,4 +1,4 @@
 fashion_mnist:
   _target_: datasets.load_dataset
   path: zalando-datasets/fashion_mnist
-  split: train
+  split: train

fusion_bench_config/fabric/auto.yaml CHANGED Viewed

@@ -1,7 +1,6 @@
 defaults:
   - loggers: tensorboard_logger
   - _self_
 _target_: lightning.Fabric
 _recursive_: true
 # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.

fusion_bench_config/fabric/llama_ddp.yaml CHANGED Viewed

@@ -1,7 +1,6 @@
 defaults:
   - loggers: tensorboard_logger
   - _self_
 _target_: lightning.Fabric
 _recursive_: true
 # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.

fusion_bench_config/fabric/llama_fsdp.yaml CHANGED Viewed

@@ -2,7 +2,6 @@ defaults:
   - loggers: tensorboard_logger
   - strategy: llama_fsdp
   - _self_
 _target_: lightning.Fabric
 _recursive_: true
 # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.

fusion_bench_config/fabric/llama_peft_fsdp.yaml CHANGED Viewed

@@ -2,7 +2,6 @@ defaults:
   - loggers: tensorboard_logger
   - strategy: llama_peft_fsdp
   - _self_
 _target_: lightning.Fabric
 _recursive_: true
 # Number of devices to train on (``int``), which GPUs to train on (``list`` or ``str``), or ``"auto"``.

fusion_bench_config/fabric/strategy/deepspeed.yaml CHANGED Viewed

@@ -1,6 +1,5 @@
 # https://lightning.ai/docs/fabric/2.4.0/api/generated/lightning.fabric.strategies.DeepSpeedStrategy.html#deepspeedstrategy
 _target_: lightning.fabric.strategies.DeepSpeedStrategy
 accelerator: null
 zero_optimization: true
 stage: 2

fusion_bench_config/fabric/strategy/llama_peft_fsdp.yaml CHANGED Viewed

@@ -6,4 +6,3 @@ auto_wrap_policy:
   _target_: fusion_bench.mixins.lightning_fabric.get_size_based_auto_wrap_policy
 activation_checkpointing_policy: ${.auto_wrap_policy}
 # limit_all_gathers: true

fusion_bench_config/fabric_model_fusion.yaml CHANGED Viewed

@@ -6,7 +6,6 @@ defaults:
   - method: dummy
   - taskpool: dummy
   - _self_
 _target_: fusion_bench.programs.FabricModelFusionProgram
 _recursive_: false
 fast_dev_run: false # Run a single batch of data to test the model or method

fusion_bench_config/llama_full_finetune.yaml CHANGED Viewed

@@ -6,10 +6,8 @@ defaults:
   - modelpool: CausalLMPool/llama_alpaca_cleaned.yaml
   - taskpool: dummy
   - _self_
 _target_: fusion_bench.programs.FabricModelFusionProgram
 _recursive_: false
 fast_dev_run: false # Run a single batch of data to test the model or method
 # Run the script without actually running the experiment, use with `print_config=true`.
 # You can also use `--cfg` or `-c` to show the configuration instead of running.

fusion_bench_config/llama_model_fusion.yaml CHANGED Viewed

@@ -3,13 +3,11 @@ defaults:
   - override modelpool: CausalLMPool/single_llama_model
   - override taskpool: dummy
   - _self_
 merged_model_save_path: null # path to save the merged model, use "{log_dir}" to refer to the logger directory, for example `merged_model_save_path=\{log_dir\}/merged_model`
 merged_model_save_kwargs:
   save_tokenizer: true
   # tokenizer_kwargs:
   #   unk_token: "<s>" # https://github.com/huggingface/transformers/issues/24318#issuecomment-1596801322
 modelpool:
   model_kwargs:
     torch_dtype: float16

fusion_bench_config/method/ada_svd/clip_vision.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.AdaSVDMergingForCLIPVisionModel
 scaling_factor: null
 num_samples: 256
 gate_k: 16

fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml CHANGED Viewed

@@ -1,23 +1,18 @@
 _target_: fusion_bench.method.adamerging.flan_t5_layer_wise_adamerging.FlanT5LayerWiseAdaMergingAlgorithm
 _recursive_: false
 optimizer:
   _target_: torch.optim.Adam
   lr: 1e-3
 dataloader_kwargs:
   batch_size: 4
   num_workers: 0
 init_values: 0.3
 max_steps: 1000
 # if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
 merging_weights_load_path: null
 merging_weights_save_path: null
 variant: null
 clamp_weights: false
 tie_weights: false
 strict: false
 cache_dir: "outputs/cache"

fusion_bench_config/method/adamerging/layer_wise_gpt2.yaml CHANGED Viewed

@@ -1,23 +1,18 @@
 _target_: fusion_bench.method.adamerging.gpt2_layer_wise_adamerging.GPT2LayerWiseAdaMergingAlgorithm
 _recursive_: false
 optimizer:
   _target_: torch.optim.Adam
   lr: 1e-4
 dataloader_kwargs:
   batch_size: 16
   num_workers: 0
 init_values: 0.3
 max_steps: 1000
 # if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
 merging_weights_load_path: null
 merging_weights_save_path: null
 variant: null
 clamp_weights: false
 tie_weights: true
 strict: false
 cache_dir: "outputs/cache"

fusion_bench_config/method/adamerging/llama_sft.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.adamerging.llama_adamerging.LayerWiseAdaMergingForLlamaSFT
 seed: 0
 output_dir: null
 # path to initialize the merging weights
@@ -26,7 +25,6 @@ fast_dev_run: ${fast_dev_run}
 # the path for saving the merging weights
 save_interval: 100
 save_merged_model: true
 dataloader_kwargs:
   batch_size: 24
   num_workers: 0

fusion_bench_config/method/adamerging.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
-# this option can be "clip_task_wise_adamerging"
-name: ???
+# this option can be one of "clip_task_wise_adamerging" or "clip_layer_wise_adamerging"
+name: clip_layer_wise_adamerging
 # this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
 # if weights is specified, skip the test-time adaptation training
 weights: null

fusion_bench_config/method/analysis/task_vector_cos_similarity.yaml CHANGED Viewed

@@ -3,4 +3,3 @@ plot_heatmap: true
 trainable_only: true
 max_points_per_model: null
 output_path: null

fusion_bench_config/method/analysis/task_vector_violin_plot.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.TaskVectorViolinPlot
 trainable_only: true
 max_points_per_model: 1000
 fig_kwargs: null

fusion_bench_config/method/classification/clip_continual_finetune.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.ContinualImageClassificationFineTuningForCLIP
 seed: 42
 # shuffle the order of the tasks
 shuffle_order: true

fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml CHANGED Viewed

@@ -3,7 +3,6 @@ name: clip_concrete_layer_wise_adamerging
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 merge_dtype: null
 optimizer: adam
 lr: 1e-3

fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml CHANGED Viewed

@@ -3,7 +3,6 @@ name: clip_concrete_task_wise_adamerging
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 merge_dtype: null
 optimizer: adam
 lr: 1e-3

fusion_bench_config/method/concrete_subspace/clip_post_defense_AWM.yaml CHANGED Viewed

@@ -1,38 +1,27 @@
 # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
 name: clip_post_defense_AWM
 # batch size per gpu
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 optimizer: adam
 lr: 1e-3
 scaling_factor: 0.3
 ###new
-adv_lr: 1e-4
+adv_lr: 1e-4
 trigger_norm: 1000
 adv_weight: 0.01
 max_steps: 2000
 save_interval: 500
 initial_logits: 0
 temperature: 0.5
 # "discrete" or "continuous", this is the mask applied for evaluation, not during training
 # the performance of final model are expected to be similar
 eval_mask_type: continuous
 mask_checkpoint: null
 # if `clamp_weights` is true, the weights will be clamped to [0, 1]
 clamp_weights: false
 # arguments of `functional_call`
 tie_weights: true
 strict: false
 cache_dir: outputs

fusion_bench_config/method/concrete_subspace/clip_post_defense_SAU.yaml CHANGED Viewed

@@ -1,41 +1,30 @@
 # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
 name: clip_post_defense_SAU
 # batch size per gpu
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 optimizer: adam
 lr: 1e-3
 scaling_factor: 0.3
 ###new
-adv_lr: 1e-4
+adv_lr: 1e-4
 trigger_norm: 1000
 adv_weight: 0.01
 shared_weight: 0.01
 beta1: 0.5
 beta2: 0.5
 max_steps: 2000
 save_interval: 500
 initial_logits: 0
 temperature: 0.5
 # "discrete" or "continuous", this is the mask applied for evaluation, not during training
 # the performance of final model are expected to be similar
 eval_mask_type: continuous
 mask_checkpoint: null
 # if `clamp_weights` is true, the weights will be clamped to [0, 1]
 clamp_weights: false
 # arguments of `functional_call`
 tie_weights: true
 strict: false
 cache_dir: outputs

fusion_bench_config/method/concrete_subspace/clip_safe_concrete_layer_wise_adamerging.yaml CHANGED Viewed

@@ -1,39 +1,30 @@
 # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
 name: clip_safe_concrete_layer_wise_adamerging
 # batch size per gpu
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 optimizer: adam
 lr: 1e-3
 base_lr: 1
 adamerging_lr: 1e-3
 scaling_factor: 0.3
 max_steps: 1000
 max_adamerging_steps: 1000
 save_interval: 500
 initial_logits: 0
 temperature: 0.5
 ###new
-adv_lr: 1e-4
+adv_lr: 1e-4
 trigger_norm: 1000
 adv_weight: 0.1
 # "discrete" or "continuous", this is the mask applied for evaluation, not during training
 # the performance of final model are expected to be similar
 eval_mask_type: continuous
 mask_checkpoint: null
 # if `clamp_weights` is true, the weights will be clamped to [0, 1]
 clamp_weights: false
 # arguments of `functional_call`
 tie_weights: true
 strict: false
 cache_dir: outputs

fusion_bench_config/method/concrete_subspace/clip_safe_concrete_task_arithmetic.yaml CHANGED Viewed

@@ -1,40 +1,27 @@
 # Reference: Jinluan Yang, et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. ICLR 2025.
 name: clip_safe_concrete_task_arithmetic
 # batch size per gpu
 # if you have multiple gpus, the total batch size will be `batch_size * num_gpus`
 batch_size: 16
 num_workers: 8
 optimizer: adam
 lr: 1e-3
 scaling_factor: 0.3
 ###new
-adv_lr: 1e-4
+adv_lr: 1e-4
 trigger_norm: 1000
 adv_weight: 0.1
 max_steps: 2000
 save_interval: 500
 initial_logits: 0
 temperature: 0.5
 # "discrete" or "continuous", this is the mask applied for evaluation, not during training
 # the performance of final model are expected to be similar
 eval_mask_type: continuous
 mask_checkpoint: null
 # if `clamp_weights` is true, the weights will be clamped to [0, 1]
 clamp_weights: false
 # arguments of `functional_call`
 tie_weights: true
 strict: false
 cache_dir: outputs

fusion_bench_config/method/dare/simple_average.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.DareSimpleAverage
 sparsity_ratio: 0.5
 only_on_linear_weights: false
 rescale: true

fusion_bench_config/method/dare/task_arithmetic.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.DareTaskArithmetic
 scaling_factor: 0.3
 sparsity_ratio: 0.5
 only_on_linear_weights: false

fusion_bench_config/method/dare/ties_merging.yaml CHANGED Viewed

@@ -1,10 +1,8 @@
 _target_: fusion_bench.method.dare.DareTiesMerging
 # === DARE parameters ===
 sparsity_ratio: 0.5
 only_on_linear_weights: false
 rescale: true
 # === Ties merging parameters ===
 # Scaling factor $\lambda$
 scaling_factor: 0.5

fusion_bench_config/method/dawe/dawe_for_clip.yaml CHANGED Viewed

@@ -4,7 +4,6 @@ merge_mode: task_wise
 init_lambda: 0.3
 batch_reduce: true
 eval_batch_reduce: false
 _dict_feature_extractor_path: microsoft/resnet-18
 dict_processor:
   _target_: fusion_bench.method.dawe.dawe_for_clip.load_resnet_processor
@@ -18,14 +17,12 @@ gate_hidden_layers: 1
 # if task_vector_dtype is null, the task vector will have the same dtype as pretrained model
 task_vector_dtype: null
 task_vector_sparsity: 0
 # training & logging args
 max_steps: 1000
 save_interval: 200
 learning_rate: 1e-5
 resume_checkpoint_path: null
 skip_training: false
 # dataloader args
 batch_size: 1
 num_workers: 0

fusion_bench_config/method/{DOGE_TA/DOGE_TA.yaml → doge_ta/doge_ta.yaml} RENAMED Viewed

@@ -1,4 +1,4 @@
 _target_: fusion_bench.method.DOGE_TA_Algorithm
 subspace: 6
 K: 30
-lamda: 0.07
+lamda: 0.07

fusion_bench_config/method/ensemble/max_model_predictor.yaml CHANGED Viewed

	@@ -1 +1 @@
1	- _target_: fusion_bench.method.MaxModelPredictorAlgorithm
1	+ _target_: fusion_bench.method.MaxModelPredictorAlgorithm

fusion_bench_config/method/ensemble/simple_ensemble.yaml CHANGED Viewed

	@@ -1,2 +1 @@
1 1	_target_: fusion_bench.method.SimpleEnsembleAlgorithm
2	-

fusion_bench_config/method/ensemble/weighted_ensemble.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.WeightedEnsembleAlgorithm
 normalize: true
 # this should be a list of floats, one for each model in the ensemble
 # If weights is null, the ensemble will use the default weights, which are equal weights for all models.

fusion_bench_config/method/gossip/layer_wise_clip.yaml ADDED Viewed

@@ -0,0 +1,30 @@
+# this option can be "clip_task_wise_gossip"
+name: clip_layer_wise_gossip
+# _target_: fusion_bench.method.CLIPLayerWiseGossipAlgorithm
+# this weights can be a list of float, or a string that points to a *.np, *.pt file containing the weights
+# if weights is specified, skip the test-time adaptation training
+weights: null
+# learning rate
+optimizer: adam
+lr: 1e-3
+init_values: 0.3
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# this is overrided by `fabric.devices` if launched from the `fusion_bench` CLI.
+devices: 1
+batch_size: 16
+num_workers: 8
+max_steps: 400 # 1000
+fast_dev_run: ${fast_dev_run}
+# the path for saving the merging weights
+save_merging_weights: 'merging_weights.pt'
+cache_dir: outputs
+# this is the parameter about gossip
+gossip_max_steps: 20
+gossip_skip_adamerging: false
+accuracy_test_interval: 0 # if this value is equal to 1, we will evaluate all models each time after Gossip
+improve_dataset: true
+topo: ring

fusion_bench_config/method/gossip/layer_wise_flan_t5.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+_target_: fusion_bench.method.gossip.flan_t5_layer_wise_gossip.FlanT5LayerWiseGossipAlgorithm
+_recursive_: false
+optimizer:
+  _target_: torch.optim.Adam
+  lr: 1e-3
+dataloader_kwargs:
+  batch_size: 4
+  num_workers: 0
+init_values: 0.3
+max_steps: 400
+# if `merging_weights_path` is specified, the merging weights will be loaded from the file and skip the training process
+merging_weights_load_path: null
+merging_weights_save_path: null
+variant: null
+clamp_weights: false
+tie_weights: false
+strict: false
+weights: null
+cache_dir: "outputs/cache"
+# this is the parameter about gossip
+gossip_max_steps: 20
+gossip_skip_adamerging: false
+accuracy_test_interval: 0 #if this value is equal to 1, we will evaluate all models each time after Gossip [1, 5, 10, 15, 20] it can also be a list #
+improve_dataset: true
+topo: ring

fusion_bench_config/method/isotropic_merging/iso_c.yaml CHANGED Viewed

@@ -1,4 +1,3 @@
 _target_: fusion_bench.method.ISO_C_Merge
 scaling_factor: 1.0
 exclude_keys: null

fusion_bench_config/method/isotropic_merging/iso_cts.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.ISO_CTS_Merge
 scaling_factor: 1.0
 common_space_fraction: 0.8
 exclude_keys: null

fusion_bench_config/method/linear/linear_interpolation.yaml CHANGED Viewed

@@ -1,3 +1,2 @@
 _target_: fusion_bench.method.LinearInterpolationAlgorithm
 t: 0.5

fusion_bench_config/method/linear/llama_expo.yaml CHANGED Viewed

@@ -7,13 +7,10 @@
 _target_: fusion_bench.method.ExPOAlgorithmForLlama
 extrapolation_factor: 0.1
 attention_scaling_factor: 1.0
 only_on_backbone: true
 on_linear_weights: true
 on_linear_bias: false
 on_embedding: false
 fix_last_n_layers: 0
 fix_first_n_layers: 0
 magnitude_sparsity_ratio: null

fusion_bench_config/method/linear/llama_expo_with_dare.yaml CHANGED Viewed

@@ -1,18 +1,13 @@
 _target_: fusion_bench.method.linear.llama_expo.ExPOWithDareForLLama
 extrapolation_factor: 0.1
 attention_scaling_factor: 1.0
 only_on_backbone: true
 on_linear_weights: true
 on_linear_bias: false
 on_embedding: false
 fix_last_n_layers: 0
 fix_first_n_layers: 0
 magnitude_sparsity_ratio: null
 # dare arguments
 dare_sparsity_ratio: 0.5
 dare_only_on_linear_weights: true

fusion_bench_config/method/linear/weighted_average.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: fusion_bench.method.WeightedAverageAlgorithm
 normalize: true # if true, the weights will be normalized before merging
 weights: # List of weights for each model
   - 0.5

fusion_bench_config/method/linear/weighted_average_for_llama.yaml CHANGED Viewed

@@ -1,5 +1,4 @@
 _target_: WeightedAverageForLLama
 normalize: true # if true, the weights will be normalized before merging
 weights: # List of weights for each model
   - 0.5

fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml CHANGED Viewed

@@ -1,12 +1,10 @@
 _target_: fusion_bench.method.BradleyTerryRewardModeling
 _recursive_: False
 optimizer:
   _target_: torch.optim.AdamW
   lr: 1e-5
   weight_decay: 0.01
   fused: null
 lr_scheduler:
   _target_: fusion_bench.optim.lr_scheduler.CosineDecayWithWarmup
   T_max: _T_max_ # this will be replaced by the expected number of training steps
@@ -14,13 +12,11 @@ lr_scheduler:
   warmup_steps: 100
   max_lr: ${..optimizer.lr}
   min_lr: 1e-6
 dataloader_kwargs:
   # per-gpu batch size
   batch_size: 1
   num_workers: 0
   pin_memory: True
 # Training hyperparameters
 # if max_epochs=-1, max_steps will be used to determine the number of training steps
 max_epochs: 3

fusion-bench 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl