PyPI - fusion-bench - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

fusion-bench 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (507) hide show

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fusion_bench
-Version: 0.2.3
+Version: 0.2.4
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -46,6 +46,8 @@ Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
+<div align='center'>
 # FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
 [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
@@ -56,11 +58,11 @@ Requires-Dist: pytest
 [![Static Badge](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black)
 [![Static Badge](https://img.shields.io/badge/code%20style-yamlfmt-black)](https://github.com/google/yamlfmt)
+</div>
 > [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
 ## Overview
 FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
@@ -198,6 +200,16 @@ fusion_bench \
   ... # other configurations
 ```
+### :rocket: Quick Start for Experienced Users
+We provide a project template for quickly starting a new fusion algorithm implementation here: [FusionBench Project Template](https://github.com/fusion-bench/fusion-bench-project-template).
+<div align='center'>
+Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-bench-project-template/generate) to initialize new repository.
+</div>
 ### FusionBench Command Generator WebUI (for v0.1.x)
 FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/README.md RENAMED Viewed

@@ -1,3 +1,5 @@
+<div align='center'>
 # FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
 [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
@@ -8,11 +10,11 @@
 [![Static Badge](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black)
 [![Static Badge](https://img.shields.io/badge/code%20style-yamlfmt-black)](https://github.com/google/yamlfmt)
+</div>
 > [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
 ## Overview
 FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
@@ -150,6 +152,16 @@ fusion_bench \
   ... # other configurations
 ```
+### :rocket: Quick Start for Experienced Users
+We provide a project template for quickly starting a new fusion algorithm implementation here: [FusionBench Project Template](https://github.com/fusion-bench/fusion-bench-project-template).
+<div align='center'>
+Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-bench-project-template/generate) to initialize new repository.
+</div>
 ### FusionBench Command Generator WebUI (for v0.1.x)
 FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/__init__.py RENAMED Viewed

@@ -20,6 +20,7 @@ _import_structure = {
     # model merging methods
     "linear": [
         "ExPOAlgorithm",
+        "ExPOAlgorithmForLlama",
         "SimpleAverageForLlama",
         "TaskArithmeticForLlama",
         "LinearInterpolationAlgorithm",
@@ -107,6 +108,7 @@ if TYPE_CHECKING:
         ExPOAlgorithm,
         LinearInterpolationAlgorithm,
         SimpleAverageForLlama,
+        ExPOAlgorithmForLlama,
         TaskArithmeticForLlama,
     )
     from .mixture_of_experts import (

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/adamerging/clip_layer_wise_adamerging.py RENAMED Viewed

@@ -13,9 +13,14 @@ fusion_bench \
 ```
 """
+import functools
 import logging
+from torch.utils.data import DataLoader
+from fusion_bench.dataset.clip_dataset import CLIPDataset
 from fusion_bench.mixins import CLIPClassificationMixin
+from fusion_bench.utils.data import InfiniteDataLoader
 from .layer_wise_adamerging import LayerWiseAdaMergingAlgorithm
@@ -31,3 +36,11 @@ class CLIPLayerWiseAdaMergingAlgorithm(
         Here we load the CLIP processor and construct the zero-shot classification head for each task.
         """
         self.setup_zero_shot_classification_head()
+    @functools.cache
+    def get_shuffled_test_loader_iter(self, task: str):
+        return super().get_shuffled_test_loader_iter(
+            task,
+            batch_size=self.config.batch_size,
+            num_workers=self.config.num_workers,
+        )

fusion_bench-0.2.4/fusion_bench/method/dare/simple_average.py ADDED Viewed

@@ -0,0 +1,31 @@
+import logging
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_mul
+from .task_arithmetic import DareTaskArithmetic
+log = logging.getLogger(__name__)
+class DareSimpleAverage(BaseAlgorithm):
+    def __init__(
+        self,
+        sparsity_ratio: float,
+        only_on_linear_weights: bool,
+        rescale: bool = True,
+        **kwargs,
+    ):
+        self.sparsity_ratio = sparsity_ratio
+        self.only_on_linear_weight = only_on_linear_weights
+        self.rescale = rescale
+        super().__init__(**kwargs)
+    def run(self, modelpool: BaseModelPool):
+        return DareTaskArithmetic(
+            scaling_factor=1 / len(modelpool),
+            sparsity_ratio=self.sparsity_ratio,
+            only_on_linear_weights=self.only_on_linear_weight,
+            rescale=self.rescale,
+        ).run(modelpool)

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/dare/task_arithmetic.py RENAMED Viewed

@@ -5,9 +5,10 @@ from fusion_bench import BaseAlgorithm, BaseModelPool
 from fusion_bench.utils.state_dict_arithmetic import state_dict_sum
 from .utils import (
-    module_sub_,
     module_random_drop_,
+    module_sub_,
     param_random_drop_,
+    trainable_state_dict,
 )
@@ -23,11 +24,13 @@ class DareTaskArithmetic(BaseAlgorithm):
         scaling_factor: float,
         sparsity_ratio: float,
         only_on_linear_weights: bool,
+        rescale: bool = True,
         **kwargs,
     ):
         self.scaling_factor = scaling_factor
         self.sparsity_ratio = sparsity_ratio
         self.only_on_linear_weights = only_on_linear_weights
+        self.rescale = rescale
         super().__init__(**kwargs)
     @torch.no_grad()
@@ -41,24 +44,28 @@ class DareTaskArithmetic(BaseAlgorithm):
             for model_name in modelpool.model_names
         }
         task_vectors = {
-            model_name: module_sub_(finetuned_models, pretrained_model)
+            model_name: module_sub_(finetuned_models[model_name], pretrained_model)
             for model_name in finetuned_models
         }
         del finetuned_models
         # drop and rescale task vectors
-        for tv in task_vectors.values():
+        for model_name, tv in task_vectors.items():
             if self.only_on_linear_weights:
-                for module in tv.modules():
+                for module_name, module in tv.named_modules():
                     if isinstance(module, nn.Linear):
+                        print(f"pruning model: `{model_name}`, layer: {module_name}.")
                         param_random_drop_(
-                            module.weight, self.sparsity_ratio, rescale=True
+                            module.weight, self.sparsity_ratio, rescale=self.rescale
                         )
             else:
-                module_random_drop_(tv, self.sparsity_ratio, rescale=True)
+                print(f"pruning model: `{model_name}`")
+                module_random_drop_(tv, self.sparsity_ratio, rescale=self.rescale)
         # merge task vectors
-        task_vector_sum = state_dict_sum(task_vectors.values())
+        task_vector_sum = state_dict_sum(
+            [trainable_state_dict(tv) for tv in task_vectors.values()]
+        )
         # scale the task vector and add it to the pretrained model
         for name, delta in task_vector_sum.items():

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/linear/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 # flake8: noqa F401
+from .expo import ExPOAlgorithm
 from .linear_interpolation import LinearInterpolationAlgorithm
+from .llama_expo import ExPOAlgorithmForLlama
 from .simple_average_for_llama import SimpleAverageForLlama
 from .task_arithmetic_for_llama import TaskArithmeticForLlama
-from .expo import ExPOAlgorithm

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/linear/expo.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """
-This module contains the implementation of ExPO merge.
+This module contains the implementation of ExPO merge for general nn.Modules.
 Reference:
 - Zheng et al. Weak-to-Strong Extrapolation Expedites Alignment.
@@ -75,5 +75,5 @@ class ExPOAlgorithm(BaseAlgorithm):
             state_dict_mul(delta_parameters, scalar=self.extrapolation_factor),
         )
-        sft_model.load_state_dict(merged_sd)
-        return sft_model
+        rlhf_model.load_state_dict(merged_sd)
+        return rlhf_model

fusion_bench-0.2.4/fusion_bench/method/linear/llama_expo.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""
+This module contains the implementation of ExPO merge for LLAMA models.
+Reference:
+- Zheng et al. Weak-to-Strong Extrapolation Expedites Alignment.
+"""
+import logging
+from typing import Optional, cast
+import torch
+from torch import nn
+from transformers import LlamaForCausalLM, LlamaModel
+from typing_extensions import override
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.method import DareSimpleAverage, SimpleAverageAlgorithm
+from fusion_bench.method.pruning.prune_utils import unstructured_magnitude_prune_
+from fusion_bench.utils.state_dict_arithmetic import StateDictType
+log = logging.getLogger(__name__)
+def expo_(
+    sft_model: nn.Module,
+    rlhf_model: nn.Module,
+    extrapolation_factor: float,
+    merge_dtype: Optional[torch.dtype] = None,
+    magnitude_sparsity_ratio: Optional[float] = None,
+):
+    """
+    Applies extrapolation to the parameters of the RLHF model based on the SFT model.
+    The RLHF model is updated in place.
+    Args:
+        sft_model (nn.Module): The supervised fine-tuned model.
+        rlhf_model (nn.Module): The reinforcement learning from human feedback model.
+        extrapolation_factor (float): The factor by which to extrapolate the parameters.
+    Returns:
+        nn.Module: The RLHF model with updated parameters.
+    """
+    rlhf_state_dict: StateDictType = rlhf_model.state_dict()
+    sft_state_dict: StateDictType = sft_model.state_dict()
+    merged_state_dict = {}
+    for n in rlhf_state_dict:
+        rlhf_p = rlhf_state_dict[n]
+        sft_p = sft_state_dict[n]
+        if merge_dtype is not None:
+            orignal_dtype = rlhf_state_dict[n].dtype
+            rlhf_p = rlhf_state_dict[n].to(dtype=merge_dtype)
+            sft_p = sft_state_dict[n].to(dtype=merge_dtype)
+        delta_p = rlhf_p - sft_p
+        if magnitude_sparsity_ratio is not None:
+            delta_p = unstructured_magnitude_prune_(
+                delta_p, torch.abs, magnitude_sparsity_ratio, return_pruned_weight=False
+            )
+        rlhf_state_dict[n] = rlhf_p + extrapolation_factor * delta_p
+        if merge_dtype is not None:
+            merged_state_dict[n] = rlhf_p.to(dtype=orignal_dtype)
+        else:
+            merged_state_dict[n] = rlhf_p
+    rlhf_model.load_state_dict(merged_state_dict)
+    return rlhf_model
+def expo_linear_modules_(
+    sft_model: nn.Module,
+    rlhf_model: nn.Module,
+    extrapolation_factor: float,
+    merge_dtype: Optional[torch.dtype] = None,
+    magnitude_sparsity_ratio: Optional[float] = None,
+):
+    """
+    Applies extrapolation to the linear modules of the RLHF model based on the SFT model.
+    The RLHF model is updated in place.
+    Args:
+        sft_model (nn.Module): The supervised fine-tuned model.
+        rlhf_model (nn.Module): The reinforcement learning from human feedback model.
+        extrapolation_factor (float): The factor by which to extrapolate the parameters.
+    Returns:
+        nn.Module: The RLHF model with updated linear modules.
+    """
+    for name, module in sft_model.named_modules():
+        if isinstance(module, nn.Linear):
+            expo_(
+                module,
+                rlhf_model.get_submodule(name),
+                extrapolation_factor=extrapolation_factor,
+                merge_dtype=merge_dtype,
+                magnitude_sparsity_ratio=magnitude_sparsity_ratio,
+            )
+    return rlhf_model
+class ExPOAlgorithmForLlama(BaseAlgorithm):
+    def __init__(
+        self,
+        extrapolation_factor: float,
+        attention_scaling_factor: float = 0.5,
+        only_on_backbone: bool = True,
+        on_linear_weights: bool = True,
+        on_linear_bias: bool = False,
+        on_embedding: bool = False,
+        fix_last_n_layers: int = 0,
+        fix_first_n_layers: int = 0,
+        magnitude_sparsity_ratio: Optional[float] = None,
+        **kwargs,
+    ):
+        self.extrapolation_factor = extrapolation_factor
+        self.attention_scaling_factor = attention_scaling_factor
+        self.only_on_backbone = only_on_backbone
+        self.on_linear_weights = on_linear_weights
+        self.on_linear_bias = on_linear_bias
+        self.on_embedding = on_embedding
+        self.fix_last_n_layers = fix_last_n_layers
+        self.fix_first_n_layers = fix_first_n_layers
+        self.magnitude_sparsity_ratio = magnitude_sparsity_ratio
+        super().__init__(**kwargs)
+    def load_models(self, modelpool: BaseModelPool):
+        sft_model: LlamaForCausalLM = modelpool.load_pretrained_model()
+        if len(modelpool) == 1:
+            rlhf_model = modelpool.load_model(modelpool.model_names[0])
+        else:
+            # if there are multiple RLHF models, use simple average to merge them before running ExPO
+            log.info(
+                f"There are {len(modelpool)} models in the model pool, averaging them first..."
+            )
+            rlhf_model = SimpleAverageAlgorithm().run(modelpool)
+        rlhf_model = cast(LlamaForCausalLM, rlhf_model)
+        return sft_model, rlhf_model
+    def run(self, modelpool: BaseModelPool):
+        if not isinstance(modelpool, BaseModelPool):
+            modelpool = BaseModelPool(modelpool)
+        assert len(modelpool.model_names) >= 1, "ExPO requires at least one model."
+        assert modelpool.has_pretrained, "ExPO requires pretrained models (base model)."
+        sft_model, rlhf_model = self.load_models(modelpool)
+        if not self.on_linear_bias:
+            for name, module in sft_model.named_modules():
+                if isinstance(module, nn.Linear):
+                    module.bias = rlhf_model.get_submodule(name).bias
+        if not self.on_linear_weights:
+            for name, module in sft_model.named_modules():
+                if isinstance(module, nn.Linear):
+                    module.weight = rlhf_model.get_submodule(name).weight
+        if not self.only_on_backbone:
+            expo_(sft_model.lm_head, rlhf_model.lm_head, self.extrapolation_factor)
+        # expo on the backbone
+        self._expo_lm_model_(
+            sft_model.model, rlhf_model.model, self.extrapolation_factor
+        )
+        return rlhf_model
+    def _expo_lm_model_(
+        self,
+        sft_model: LlamaModel,
+        rlhf_model: LlamaModel,
+        extrapolation_factor: float,
+    ):
+        if self.on_embedding:
+            expo_(sft_model.embed_tokens, rlhf_model.embed_tokens, extrapolation_factor)
+        if self.fix_first_n_layers == "half":
+            self.fix_first_n_layers = len(sft_model.layers) // 2
+        if self.fix_last_n_layers == "half":
+            self.fix_last_n_layers = len(sft_model.layers) // 2
+        for layer_idx in range(
+            self.fix_first_n_layers, len(sft_model.layers) - self.fix_last_n_layers
+        ):
+            sft_layer = sft_model.layers[layer_idx]
+            expo_linear_modules_(
+                sft_layer.self_attn,
+                rlhf_model.layers[layer_idx].self_attn,
+                extrapolation_factor=extrapolation_factor
+                * self.attention_scaling_factor,
+                merge_dtype=torch.float32,
+                magnitude_sparsity_ratio=self.magnitude_sparsity_ratio,
+            )
+            expo_linear_modules_(
+                sft_layer.mlp,
+                rlhf_model.layers[layer_idx].mlp,
+                extrapolation_factor=extrapolation_factor,
+                merge_dtype=torch.float32,
+                magnitude_sparsity_ratio=self.magnitude_sparsity_ratio,
+            )
+class ExPOWithDareForLLama(ExPOAlgorithmForLlama):
+    def __init__(
+        self,
+        dare_sparsity_ratio: float,
+        dare_only_on_linear_weights: bool,
+        dare_rescale: bool = True,
+        **kwargs,
+    ):
+        self.dare_sparsity_ratio = dare_sparsity_ratio
+        self.dare_only_on_linear_weights = dare_only_on_linear_weights
+        self.dare_rescale = dare_rescale
+        super().__init__(**kwargs)
+    @override
+    def load_models(self, modelpool: BaseModelPool):
+        log.info(
+            f"There are {len(modelpool)} models in the model pool, averaging them first..."
+        )
+        rlhf_model = DareSimpleAverage(
+            sparsity_ratio=self.dare_sparsity_ratio,
+            only_on_linear_weights=self.dare_only_on_linear_weights,
+            rescale=self.dare_rescale,
+        ).run(modelpool)
+        rlhf_model = cast(LlamaForCausalLM, rlhf_model)
+        sft_model: LlamaForCausalLM = modelpool.load_pretrained_model()
+        return sft_model, rlhf_model

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/method/pruning/magnitude_diff_pruning.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import logging
 import re
 from copy import deepcopy
-from typing import Dict, List  # noqa: F401
+from typing import Dict, List, Optional, Union  # noqa: F401
 import torch
 from torch import Tensor, nn
@@ -75,6 +75,7 @@ class MagnitudeDiffPruningAlgorithm(
     def __init__(
         self,
         prune_ratio: float,
+        rescale: Optional[Union[bool, float]] = None,
         extract_names: List[str] = None,
         **kwargs,
     ):
@@ -87,6 +88,7 @@ class MagnitudeDiffPruningAlgorithm(
             **kwargs: Additional keyword arguments.
         """
         self.prune_ratio = prune_ratio
+        self.rescale = rescale
         self.extract_names = extract_names
         super().__init__(**kwargs)
@@ -121,6 +123,7 @@ class MagnitudeDiffPruningAlgorithm(
         self.print_profile_summary()
         return model
+    @torch.no_grad()
     def magnitude_prune(
         self,
         pretrained_model: nn.Module,
@@ -171,6 +174,8 @@ class MagnitudeDiffPruningAlgorithm(
             if _is_name_matched(name, extract_names):
                 w_diff = ft_state_dict[name] - param
                 w_diff = _magnitude_prune(w_diff, prune_ratio=self.prune_ratio)
+                if self.rescale is not None and self.rescale:
+                    w_diff = w_diff * self.rescale
                 param.data = param + w_diff
         return model

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench/mixins/clip_classification.py RENAMED Viewed

@@ -51,10 +51,46 @@ class CLIPClassificationMixin(LightningFabricMixin):
         return self._clip_processor
     @functools.cache
-    def get_shuffled_test_loader_iter(self, task: str):
+    def get_shuffled_test_loader_iter(
+        self,
+        task: str,
+        batch_size: Optional[int] = None,
+        num_workers: Optional[int] = None,
+        **loader_kwargs,
+    ):
+        """
+        Get an iterator for a shuffled test DataLoader.
+        This method creates a DataLoader for the test dataset of the specified task,
+        with shuffling enabled. It allows for optional customization of batch size,
+        number of workers, and other DataLoader keyword arguments.
+        Args:
+            task (str): The task identifier for which the test dataset is to be loaded.
+            batch_size (Optional[int]): The batch size to use for the DataLoader. If None, the default batch size is used.
+            num_workers (Optional[int]): The number of worker processes to use for data loading. If None, the default number of workers is used.
+            **loader_kwargs: Additional keyword arguments to pass to the DataLoader.
+        Returns:
+            Iterator: An iterator over the shuffled test DataLoader.
+        """
+        # get dataloader kwargs
+        dataloader_kwargs = self._dataloader_kwargs.copy()
+        dataloader_kwargs["shuffle"] = True
+        if batch_size is not None:
+            dataloader_kwargs["batch_size"] = batch_size
+        if num_workers is not None:
+            dataloader_kwargs["num_workers"] = num_workers
+        dataloader_kwargs.update(loader_kwargs)
+        # get the test dataset
+        clip_dataset = CLIPDataset(
+            self.modelpool.load_test_dataset(task), self.clip_processor
+        )
+        # create the dataloader
         loader = DataLoader(
-            CLIPDataset(self.modelpool.load_test_dataset(task), self.clip_processor),
-            **self._dataloader_kwargs,
+            clip_dataset,
+            **dataloader_kwargs,
         )
         loader = self.fabric.setup_dataloaders(loader)
         return iter(InfiniteDataLoader(loader))

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fusion_bench
-Version: 0.2.3
+Version: 0.2.4
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -46,6 +46,8 @@ Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
+<div align='center'>
 # FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
 [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
@@ -56,11 +58,11 @@ Requires-Dist: pytest
 [![Static Badge](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black)
 [![Static Badge](https://img.shields.io/badge/code%20style-yamlfmt-black)](https://github.com/google/yamlfmt)
+</div>
 > [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
 ## Overview
 FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
@@ -198,6 +200,16 @@ fusion_bench \
   ... # other configurations
 ```
+### :rocket: Quick Start for Experienced Users
+We provide a project template for quickly starting a new fusion algorithm implementation here: [FusionBench Project Template](https://github.com/fusion-bench/fusion-bench-project-template).
+<div align='center'>
+Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-bench-project-template/generate) to initialize new repository.
+</div>
 ### FusionBench Command Generator WebUI (for v0.1.x)
 FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench.egg-info/SOURCES.txt RENAMED Viewed

@@ -100,6 +100,8 @@ fusion_bench/../fusion_bench_config/method/fisher_merging/fisher_merging.yaml
 fusion_bench/../fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml
 fusion_bench/../fusion_bench_config/method/linear/expo.yaml
 fusion_bench/../fusion_bench_config/method/linear/linear_interpolation.yaml
+fusion_bench/../fusion_bench_config/method/linear/llama_expo.yaml
+fusion_bench/../fusion_bench_config/method/linear/llama_expo_with_dare.yaml
 fusion_bench/../fusion_bench_config/method/linear/simple_average_for_llama.yaml
 fusion_bench/../fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml
 fusion_bench/../fusion_bench_config/method/linear/weighted_average.yaml
@@ -107,6 +109,7 @@ fusion_bench/../fusion_bench_config/method/linear/weighted_average_for_llama.yam
 fusion_bench/../fusion_bench_config/method/pruning/llama_magnitude_pruning.yaml
 fusion_bench/../fusion_bench_config/method/pruning/llama_random_pruning.yaml
 fusion_bench/../fusion_bench_config/method/pruning/llama_wanda_pruning.yaml
+fusion_bench/../fusion_bench_config/method/pruning/magnitude_diff_pruning.yaml
 fusion_bench/../fusion_bench_config/method/regmean/clip_regmean.yaml
 fusion_bench/../fusion_bench_config/method/regmean/gpt2_regmean.yaml
 fusion_bench/../fusion_bench_config/method/regmean/regmean.yaml
@@ -293,6 +296,7 @@ fusion_bench/method/fisher_merging/gpt2_fisher_merging.py
 fusion_bench/method/linear/__init__.py
 fusion_bench/method/linear/expo.py
 fusion_bench/method/linear/linear_interpolation.py
+fusion_bench/method/linear/llama_expo.py
 fusion_bench/method/linear/simple_average_for_llama.py
 fusion_bench/method/linear/task_arithmetic_for_llama.py
 fusion_bench/method/lm_finetune/__init__.py

{fusion_bench-0.2.3 → fusion_bench-0.2.4}/fusion_bench_config/method/dare/task_arithmetic.yaml RENAMED Viewed

@@ -3,3 +3,4 @@ _target_: fusion_bench.method.DareTaskArithmetic
 scaling_factor: 0.3
 sparsity_ratio: 0.5
 only_on_linear_weights: false
+rescale: true

fusion_bench-0.2.4/fusion_bench_config/method/linear/llama_expo.yaml ADDED Viewed

@@ -0,0 +1,19 @@
+# This algorithm merges a pretrained model with a finetuned model.
+#
+# $$\theta_{merged} = \theta_{ft} + \alpha (\theta_{ft} - \theta_{pre})$$
+#
+# where $\theta_{merged}$ is the merged model, $\theta_{ft}$ is the finetuned model (medium-aligned model),
+# $\theta_{pre}$ is the pretrained model (base model), and $\alpha$ is the extrapolation factor.
+_target_: fusion_bench.method.ExPOAlgorithmForLlama
+extrapolation_factor: 0.1
+attention_scaling_factor: 1.0
+only_on_backbone: true
+on_linear_weights: true
+on_linear_bias: false
+on_embedding: false
+fix_last_n_layers: 0
+fix_first_n_layers: 0
+magnitude_sparsity_ratio: null

fusion-bench 0.2.3__tar.gz → 0.2.4__tar.gz

fusion-bench 0.2.3tar.gz → 0.2.4tar.gz