PyPI - fusion-bench - Versions diffs - 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl - Mend

fusion-bench 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

fusion_bench_config/fabric/loggers/wandb_logger.yaml CHANGED Viewed

@@ -1,2 +1,4 @@
 # https://lightning.ai/docs/fabric/2.4.0/guide/loggers/wandb.html#weights-and-biases
 _target_: wandb.integration.lightning.fabric.WandbLogger
+project: ${hydra:job.config_name}
+save_dir: ${path.log_dir}

fusion_bench_config/fabric_model_fusion.yaml CHANGED Viewed

@@ -32,6 +32,7 @@ _recursive_: false # Disable recursive instantiation
 # =============================================================================
 # Experiment Execution Settings
 # =============================================================================
+seed: null # Random seed for reproducibility
 # Development and debugging options
 fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
 dry_run: false # Show configuration without running experiment

fusion_bench_config/method/adamerging/resnet.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+# for layer-wise adamerging, use fusion_bench.method.ResNetLayerWiseAdamerging
+_target_: fusion_bench.method.ResNetTaskWiseAdamerging
+max_steps: 1000
+init_values: null
+resume_weights_path: null
+# if `clamp_weights` is true, the weights will be clamped to [0, 1]
+clamp_weights: false
+# arguments of `functional_call`
+tie_weights: true
+strict: false
+# optimizer and lr scheduler for test-time adaptation training
+optimizer:
+  _target_: torch.optim.Adam
+  lr: 1e-3
+lr_scheduler: null
+dataloader_kwargs:
+  batch_size: 16
+  num_workers: 4

fusion_bench_config/method/bitdelta/bitdelta.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: BitDelta
+# =============================================================================
 _target_: fusion_bench.method.bitdelta.BitDeltaAlgorithm
 save_dir: null
 save_full_model: false

fusion_bench_config/method/classification/clip_finetune.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: CLIP Finetune
+# =============================================================================
+# Finetunes CLIP models with optional LoRA adapters.
+# =============================================================================
 name: clip_finetune
 seed: 42
 learning_rate: 1e-5

fusion_bench_config/method/classification/image_classification_finetune.yaml CHANGED Viewed

@@ -1,6 +1,15 @@
 _target_: fusion_bench.method.classification.ImageClassificationFineTuning
 max_epochs: 10
 max_steps: null
+# if ``save_top_k == k``,
+# the best k models according to the quantity monitored will be saved.
+# If ``save_top_k == 0``, no models are saved.
+# If ``save_top_k == -1``, all models are saved.
+save_top_k: 1
+# Interval (in epochs or steps, determined by `max_epochs` and `max_steps`) between checkpoints.
+save_interval: 1
+save_on_train_epoch_end: true
+training_data_ratio: null
 label_smoothing: 0
 optimizer:
   _target_: torch.optim.SGD

fusion_bench_config/method/depth_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,12 @@
+# =============================================================================
+# FusionBench Method Configuration: Depth Upscaling
+# =============================================================================
+# Constructs a deeper model by stacking/selecting layers from existing models.
+#
+# - layer_indices: list[int | str] specifying which layers to use. Strings are Python
+#   expressions evaluated to lists, e.g., "range(6,12)".
+# - Example: [0, 2, 4, "range(6,12)"] selects 1st, 3rd, 5th, and 7th-12th layers.
+# =============================================================================
 _target_: DepthUpscalingAlgorithm
 # this should be a list of integers or string, indicating the sequence of layers.
 # If the entry is an integer, it will use the n-th layer of the model.

fusion_bench_config/method/dop/dop.yaml ADDED Viewed

@@ -0,0 +1,30 @@
+_target_: fusion_bench.method.dop.dop.ContinualDOPForCLIP
+# the random seed to use
+seed: null
+# shuffle the order of the models
+shuffle_order: true
+# save the merged model on every step
+save_on_every_step: false
+# evaluate the merged model on every step
+evaluate_on_every_step: true
+# optimizer (learning rate)
+lr: 1e-4
+# optimizer (num_steps)
+num_steps: 200
+# weighted loss
+# if mgda is true, use mgda to optimize the loss weights
+mgda: true
+# if mgda is false, this is the weight for the loss of the first task
+alpha: 0.8
+# if mgda is true and ema is ture, using exponential moving average (ema), alpha is the initial value
+ema: true
+# if mgda is true and ema is ture, using exponential moving average (ema), beta is the decay rate
+ema_beta: 0.999
+# epsilon for svd (the proportion of energy retained)
+svd_epsilon: 0.99999
+# the space to project the delta w (left singular vectors, right singular vectors, or both)
+svd_proj_space: uv # u or v or uv

fusion_bench_config/method/dummy.yaml CHANGED Viewed

@@ -1 +1,7 @@
+# =============================================================================
+# FusionBench Method Configuration: Dummy
+# =============================================================================
+# No-op method for testing pipelines and wiring.
+# Instantiates and exits without modifying models.
+# =============================================================================
 _target_: fusion_bench.method.DummyAlgorithm

fusion_bench_config/method/ensemble/max_model_predictor.yaml CHANGED Viewed

@@ -1 +1,7 @@
+# =============================================================================
+# FusionBench Method Configuration: Max Model Predictor
+# =============================================================================
+# Selects the model with maximum confidence or performance per example/task.
+# No additional hyperparameters are required.
+# =============================================================================
 _target_: fusion_bench.method.MaxModelPredictorAlgorithm

fusion_bench_config/method/ensemble/simple_ensemble.yaml CHANGED Viewed

@@ -1,2 +1,9 @@
+# =============================================================================
+# FusionBench Method Configuration: Simple Ensemble
+# =============================================================================
+# Averages model predictions uniformly.
+#
+# device_map: leave null for single device or provide a mapping for multi-device setups.
+# =============================================================================
 _target_: fusion_bench.method.SimpleEnsembleAlgorithm
-device_map: null  # Set to null for single device, or specify mapping
+device_map: null # Set to null for single device, or specify mapping

fusion_bench_config/method/ensemble/weighted_ensemble.yaml CHANGED Viewed

@@ -1,3 +1,11 @@
+# =============================================================================
+# FusionBench Method Configuration: Weighted Ensemble
+# =============================================================================
+# Ensembles model predictions using specified per-model weights.
+#
+# - Set normalize=true to rescale weights to sum to 1.
+# - weights: one float per model in the pool (order-sensitive). If null, uses equal weights.
+# =============================================================================
 _target_: fusion_bench.method.WeightedEnsembleAlgorithm
 normalize: true
 # this should be a list of floats, one for each model in the ensemble

fusion_bench_config/method/linear/expo.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: ExPO
+# =============================================================================
+# Extrapolates from pretrained to finetuned direction by a factor.
+# =============================================================================
 # This algorithm merges a pretrained model with a finetuned model.
 #
 # $$\theta_{merged} = \theta_{ft} + \alpha (\theta_{ft} - \theta_{pre})$$

fusion_bench_config/method/linear/linear_interpolation.yaml CHANGED Viewed

@@ -1,2 +1,10 @@
+# =============================================================================
+# FusionBench Method Configuration: Linear Interpolation
+# =============================================================================
+# Interpolates between two models: (1 - t) * model0 + t * model1
+#
+# - t in [0,1]: 0 returns model0; 1 returns model1.
+# - Only meaningful for two-model pools.
+# =============================================================================
 _target_: fusion_bench.method.LinearInterpolationAlgorithm
 t: 0.5

fusion_bench_config/method/linear/llama_expo.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: ExPO for LLaMA
+# =============================================================================
+# LLaMA-specific ExPO with backbone-only and attention scaling options.
+# =============================================================================
 # This algorithm merges a pretrained model with a finetuned model.
 #
 # $$\theta_{merged} = \theta_{ft} + \alpha (\theta_{ft} - \theta_{pre})$$

fusion_bench_config/method/linear/llama_expo_with_dare.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: ExPO with DARE (LLaMA)
+# =============================================================================
 _target_: fusion_bench.method.linear.llama_expo.ExPOWithDareForLLama
 extrapolation_factor: 0.1
 attention_scaling_factor: 1.0

fusion_bench_config/method/linear/simple_average_for_causallm.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: Simple Average (Causal LM)
+# =============================================================================
+# Uniformly averages causal LM weights with optional backbone-only.
+# =============================================================================
 _target_: fusion_bench.method.SimpleAverageForCausalLM
 # set `merge_backbone` to true if you has a base model and only want to merge the backbone of the experts
 # if `merge_backbone` is False, this is equivalent to `SimpleAverageAlgorithm`

fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: Task Arithmetic (Causal LM)
+# =============================================================================
 _target_: fusion_bench.method.TaskArithmeticForCausalLM
 scaling_factor: 0.3
 merge_backbone: false

fusion_bench_config/method/linear/ties_merging_for_causallm.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: TIES Merging (Causal LM)
+# =============================================================================
+# TIES merging adapted for causal language models with optional backbone-only.
+# =============================================================================
 _target_: fusion_bench.method.TiesMergingForCausalLM
 # Scaling factor $\lambda$
 scaling_factor: 0.3

fusion_bench_config/method/linear/weighted_average.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: Weighted Average (Linear)
+# =============================================================================
 _target_: fusion_bench.method.WeightedAverageAlgorithm
 normalize: true # if true, the weights will be normalized before merging
 weights: # List of weights for each model

fusion_bench_config/method/linear/weighted_average_for_llama.yaml CHANGED Viewed

@@ -1,4 +1,9 @@
-_target_: WeightedAverageForLLama
+# =============================================================================
+# FusionBench Method Configuration: Weighted Average for LLaMA
+# =============================================================================
+# Like Weighted Average but supports merging only backbone and saving tokenizer.
+# =============================================================================
+_target_: fusion_bench.method.WeightedAverageForLLama
 normalize: true # if true, the weights will be normalized before merging
 weights: # List of weights for each model
   - 0.5

fusion_bench_config/method/mixtral_moe_merging.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: Mixtral MoE Merging/Upscaling
+# =============================================================================
 name: mixtral_moe_upscaling # or "mixtral_for_causal_lm_moe_upscaling"
 experts_per_token: 2
 # path to save the upscaled model

fusion_bench_config/method/mixtral_moe_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: Mixtral MoE Upscaling
+# =============================================================================
+# Upscales a base model into a Mixture-of-Experts variant (Mixtral family).
+# =============================================================================
 # or fusion_bench.method.MixtralUpscalingAlgorithm
 _target_: fusion_bench.method.MixtralForCausalLMUpscalingAlgorithm
 num_experts: 4

fusion_bench_config/method/model_recombination.yaml CHANGED Viewed

@@ -1,3 +1,11 @@
+# =============================================================================
+# FusionBench Method Configuration: Model Recombination
+# =============================================================================
+# Recombines submodules/layers from multiple models to form a new model.
+#
+# - return_modelpool: override run() argument to return model pool instead of merged model.
+#   Set to null to respect runtime argument; set to true/false to force behavior.
+# =============================================================================
 _target_: fusion_bench.method.ModelRecombinationAlgorithm
 # if `return_model_pool` is not null, the argument `return_modelpool` passed to the `run` method will be ignored.
 return_modelpool: null

fusion_bench_config/method/model_stock/model_stock.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: Model Stock
+# =============================================================================
 _target_: fusion_bench.method.model_stock.ModelStock
 ignore_keys:
   [
@@ -9,4 +12,4 @@ ignore_keys:
     "model.ln_final.bias",
   ]
 model_save_path: ${path.log_dir}/checkpoint
-model_save_kwargs: null
+model_save_kwargs: null

fusion_bench_config/method/opcm/opcm.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: OPCM
+# =============================================================================
+# Incrementally merges models via SVD projection and evaluation per step.
+# =============================================================================
 _target_: fusion_bench.method.opcm.opcm.OPCMForCLIP
 # shuffle the order of the models
 shuffle_order: true

fusion_bench_config/method/opcm/task_arithmetic.yaml CHANGED Viewed

@@ -1,3 +1,9 @@
+# =============================================================================
+# FusionBench Method Configuration: Continual Task Arithmetic
+# =============================================================================
+# Applies task arithmetic incrementally across a stream of models.
+# Maintains per-step save/eval similar to OPCM.
+# =============================================================================
 _target_: fusion_bench.method.opcm.task_arithmetic.ContinualTaskArithmeticForCLIP
 scaling_factor: 0.3
 # shuffle the order of the models

fusion_bench_config/method/opcm/ties_merging.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: Continual TIES Merging
+# =============================================================================
+# Continual variant of TIES merging with per-step save/eval instrumentation.
+# =============================================================================
 _target_: fusion_bench.method.opcm.ties_merging.ContinualTiesMergingForCLIP
 # Scaling factor $\lambda$
 scaling_factor: 0.5

fusion_bench_config/method/opcm/weight_average.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: Continual Weighted Average
+# =============================================================================
+# Incrementally averages model weights as new models arrive.
+# =============================================================================
 _target_: fusion_bench.method.opcm.weight_average.ContinualWeightAverageForCLIP
 # shuffle the order of the models
 shuffle_order: true

fusion_bench_config/method/regmean/clip_regmean.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: RegMean (CLIP)
+# =============================================================================
 _target_: fusion_bench.method.RegMeanAlgorithmForCLIP
 # list, regular expression of names of parameters that need to be excluded
 exclude_param_names_regex: []

fusion_bench_config/method/regmean/gpt2_regmean.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: RegMean (GPT-2)
+# =============================================================================
 _target_: fusion_bench.method.RegMeanAlgorithmForGPT2
 # list, regular expression of names of parameters that need to be excluded
 exclude_param_names_regex: []

fusion_bench_config/method/regmean/regmean.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: RegMean (Base)
+# =============================================================================
 _target_: ???
 num_regmean_examples: 256
 reduce_non_diagonal_ratio: 0.1

fusion_bench_config/method/regmean_plusplus/clip_regmean_plusplus.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: RegMean++ (CLIP)
+# =============================================================================
 _target_: fusion_bench.method.RegMeanAlgorithmForCLIPPlusPlus
 # list, regular expression of names of parameters that need to be excluded
 exclude_param_names_regex: []

fusion_bench_config/method/simple_average.yaml CHANGED Viewed

@@ -1 +1,10 @@
+# =============================================================================
+# FusionBench Method Configuration: Simple Average
+# =============================================================================
+# Equally averages parameters of all models in the model pool.
+#
+# Usage notes
+# - No hyperparameters required; behavior is deterministic given model order.
+# - Ensure models are architecture-compatible (same shapes) before merging.
+# =============================================================================
 _target_: fusion_bench.method.SimpleAverageAlgorithm

fusion_bench_config/method/slerp/slerp.yaml CHANGED Viewed

@@ -1,3 +1,12 @@
+# =============================================================================
+# FusionBench Method Configuration: Spherical Linear Interpolation (SLERP)
+# =============================================================================
+# Interpolates between two parameter vectors on a hypersphere.
+#
+# - t in [0,1]: interpolation factor; 0 returns model0; 1 returns model1.
+# - DOT_THRESHOLD: threshold to switch to linear interpolation when vectors are near-aligned.
+# - epsilon: small constant to avoid division by zero.
+# =============================================================================
 _target_: fusion_bench.method.SlerpMergeAlgorithm
 t: 0.5 # interpolation factor
 DOT_THRESHOLD: 0.9995

fusion_bench_config/method/slerp/slerp_lm.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: SLERP for Causal LM
+# =============================================================================
+# Spherical linear interpolation between two causal language models.
+# =============================================================================
 _target_: fusion_bench.method.SlerpForCausalLM
 t: 0.5
 model_save_path: ${path.log_dir}/checkpoint

fusion_bench_config/method/smile_upscaling/causal_lm_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,9 @@
+# =============================================================================
+# FusionBench Method Configuration: SMILE Upscaling (Causal LM)
+# =============================================================================
+# Upscales causal language models (Qwen2/Llama/Mistral) with SMILE.
+# Supports auto-detection or explicit model_type override.
+# =============================================================================
 # Generic SMILE Upscaling Configuration for CausalLM models
 # Supports: Qwen2, Llama, Mistral models
 # The model type will be auto-detected from the base model

fusion_bench_config/method/smile_upscaling/error_accumulation.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Analysis Configuration: SMILE Error Accumulation
+# =============================================================================
+# Analyzes how approximation error accumulates with gating and rank parameters.
+# =============================================================================
 # Measure error accumulation
 _target_: fusion_bench.method.smile_upscaling.error_accumulation.ErrorAccumulationAnalysisForCLIP
 gate_k: 16

fusion_bench_config/method/smile_upscaling/projected_energy.yaml CHANGED Viewed

@@ -1,2 +1,7 @@
+# =============================================================================
+# FusionBench Analysis Configuration: SMILE Projected Energy
+# =============================================================================
+# Measures projected energy retained by singular vectors during SMILE upscaling.
+# =============================================================================
 # Measure projected energy
 _target_: fusion_bench.method.smile_upscaling.projected_energy.ProjectedEnergyAnalysis

fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: SMILE Singular Projection Merging
+# =============================================================================
 name: singular_projection_merging
 # merge device on cuda can accelerate the SVD computation
 device: cuda

fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: SMILE Upscaling (Mistral)
+# =============================================================================
+# Specialized SMILE upscaling for Mistral models with rank settings.
+# =============================================================================
 _target_: fusion_bench.method.smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm
 # device to put the models on
 device: cpu

fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,8 @@
+# =============================================================================
+# FusionBench Method Configuration: SMILE Upscaling (Qwen2)
+# =============================================================================
+# Specialized SMILE upscaling for Qwen2 models with rank settings.
+# =============================================================================
 _target_: fusion_bench.method.smile_upscaling.smile_qwen2_upscaling.SmileQwen2UpscalingAlgorithm
 # device to put the models on
 device: cpu

fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: SMILE Upscaling
+# =============================================================================
 _target_: fusion_bench.method.SmileUpscalingAlgorithm
 # merge device on cuda can accelerate the SVD computation
 device: cpu

fusion_bench_config/method/task_arithmetic.yaml CHANGED Viewed

@@ -1,2 +1,11 @@
+# =============================================================================
+# FusionBench Method Configuration: Task Arithmetic
+# =============================================================================
+# Performs task vector arithmetic: base + lambda * \sum_i (task_i - base).
+#
+# Notes
+# - scaling_factor controls the contribution of the task delta.
+# - Model compatibility is required (matching parameter shapes).
+# =============================================================================
 _target_: fusion_bench.method.TaskArithmeticAlgorithm
 scaling_factor: 0.3

fusion_bench_config/method/ties_merging.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: Ties Merging
+# =============================================================================
 _target_: fusion_bench.method.TiesMergingAlgorithm
 # Scaling factor $\lambda$
 scaling_factor: 0.3

fusion_bench_config/method/wudi/wudi.yaml CHANGED Viewed

@@ -1,3 +1,6 @@
+# =============================================================================
+# FusionBench Method Configuration: WUDI Merging
+# =============================================================================
 _target_: fusion_bench.method.WUDIMerging
 iter_num: 400

fusion_bench_config/model_fusion.yaml CHANGED Viewed

@@ -3,7 +3,7 @@
 # =============================================================================
 # This configuration file defines the settings for running model fusion experiments
 # within FusionBench using standard PyTorch (without Lightning Fabric).
-#
+#
 # The configuration includes:
 #
 # - Hydra framework settings and overrides
@@ -30,6 +30,7 @@ _recursive_: false # Disable recursive instantiation
 # =============================================================================
 # Experiment Execution Settings
 # =============================================================================
+seed: null # Random seed for reproducibility
 # Development and debugging options
 fast_dev_run: false # This option is for quick testing. For example, run single batch instead of full dataset
 dry_run: false # Show configuration without running experiment

fusion-bench 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

fusion-bench 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl