PyPI - fusion-bench - Versions diffs - 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

fusion-bench 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py ADDED Viewed

@@ -0,0 +1,38 @@
+from typing import List, Optional, Union
+from transformers.models.llama import LlamaTokenizerFast
+class DeepseekTokenizerFast(LlamaTokenizerFast):
+    def convert_ids_to_tokens(
+        self, ids: Union[int, List[int]], skip_special_tokens: bool = False
+    ) -> Union[str, List[str]]:
+        """
+        Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
+        added tokens.
+        Args:
+            ids (`int` or `List[int]`):
+                The token id (or token ids) to convert to tokens.
+            skip_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not to remove special tokens in the decoding.
+        Returns:
+            `str` or `List[str]`: The decoded token(s).
+        """
+        if isinstance(ids, int):
+            return self._convert_id_to_token(ids)
+        tokens = []
+        for index in ids:
+            index = int(index)
+            if skip_special_tokens and index in self.all_special_ids:
+                continue
+            token = self._tokenizer.id_to_token(index)
+            tokens.append(token if token is not None else "")
+        return tokens
+    def _convert_id_to_token(self, index: int) -> Optional[str]:
+        token = self._tokenizer.id_to_token(int(index))
+        return token if token is not None else ""

fusion_bench/models/parameter_dict.py CHANGED Viewed

@@ -66,7 +66,9 @@ class ParameterDictModel(nn.Module):
         super().__init__()
         if parameters is not None:
             for name, param in parameters.items():
-                assert isinstance(param, nn.Parameter), f"{name} is not a nn.Parameter"
+                assert isinstance(
+                    param, (nn.Parameter, nn.Buffer)
+                ), f"{name} is not a nn.Parameter or nn.Buffer"
                 _set_attr(
                     self,
                     name.split("."),
@@ -114,3 +116,6 @@ class ParameterDictModel(nn.Module):
     def values(self) -> List[nn.Parameter]:
         return [self[name] for name in self.keys()]
+    def __len__(self):
+        return len(self.keys())

fusion_bench/programs/fabric_fusion_program.py CHANGED Viewed

@@ -9,7 +9,7 @@ from omegaconf import DictConfig, OmegaConf
 from torch import nn
 from tqdm.auto import tqdm
-import fusion_bench.utils.instantiate
+import fusion_bench.utils.instantiate_utils
 from fusion_bench.method import BaseAlgorithm
 from fusion_bench.mixins import LightningFabricMixin
 from fusion_bench.modelpool import BaseModelPool
@@ -19,8 +19,9 @@ from fusion_bench.utils import import_object, instantiate, timeit_context
 from fusion_bench.utils.hydra_utils import get_hydra_output_dir
 from fusion_bench.utils.json import print_json
 from fusion_bench.utils.rich_utils import print_bordered, print_config_tree
+from fusion_bench.utils.pylogger import getRankZeroLogger
-log = logging.getLogger(__name__)
+log = getRankZeroLogger(__name__)
 class FabricModelFusionProgram(
@@ -66,8 +67,8 @@ class FabricModelFusionProgram(
         self.merged_model_save_kwargs = merged_model_save_kwargs
         self.fast_dev_run = fast_dev_run
         self.seed = seed
+        fusion_bench.utils.instantiate_utils.PRINT_FUNCTION_CALL = print_function_call
         super().__init__(**kwargs)
-        fusion_bench.utils.instantiate.PRINT_FUNCTION_CALL = print_function_call
         if print_config:
             print_config_tree(
@@ -196,6 +197,11 @@ class FabricModelFusionProgram(
             for key, item in merged_model.items():
                 if isinstance(item, nn.Module):
                     report[key] = taskpool.evaluate(item, *args, **kwargs)
+                elif key == "models":
+                    # for multi-model evaluation
+                    report[key] = self.evaluate_merged_model(
+                        taskpool, item, *args, **kwargs
+                    )
                 else:
                     # metadata
                     report[key] = item
@@ -247,13 +253,16 @@ class FabricModelFusionProgram(
             if self.taskpool is not None:
                 report = self.evaluate_merged_model(self.taskpool, merged_model)
                 try:
-                    print_json(report, print_type=False)
+                    if rank_zero_only.rank == 0:
+                        print_json(report, print_type=False)
                 except Exception as e:
                     log.warning(f"Failed to pretty print the report: {e}")
-                    print(report)
+                    log.info(report)
                 if self.report_save_path is not None:
                     # save report (Dict) to a file
                     # if the directory of `save_report` does not exists, create it
+                    if "{log_dir}" in self.report_save_path and self.log_dir is not None:
+                        self.report_save_path = self.report_save_path.format(log_dir=self.log_dir)
                     os.makedirs(os.path.dirname(self.report_save_path), exist_ok=True)
                     json.dump(report, open(self.report_save_path, "w"))
             else:

fusion_bench/taskpool/base_pool.py CHANGED Viewed

@@ -5,6 +5,7 @@ from fusion_bench.mixins import BaseYAMLSerializableModel
 class BaseTaskPool(BaseYAMLSerializableModel):
     _program = None
+    _config_key = "taskpool"
     @abstractmethod
     def evaluate(self, model, *args, **kwargs):

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -348,8 +348,15 @@ class CLIPVisionModelTaskPool(
         log.info(f"Evaluation Result: {report}")
         if self.fabric.is_global_zero and len(self.fabric._loggers) > 0:
-            with open(os.path.join(self.log_dir, "report.json"), "w") as fp:
+            save_path = os.path.join(self.log_dir, "report.json")
+            for version in itertools.count(1):
+                if not os.path.exists(save_path):
+                    break
+                # if the file already exists, increment the version to avoid overwriting
+                save_path = os.path.join(self.log_dir, f"report_{version}.json")
+            with open(save_path, "w") as fp:
                 json.dump(report, fp)
+            log.info(f"Evaluation report saved to {save_path}")
         return report
     def on_task_evaluation_begin(self, classifier: HFCLIPClassifier, task_name: str):

fusion_bench/taskpool/dummy.py CHANGED Viewed

@@ -10,6 +10,7 @@ from fusion_bench.models.separate_io import separate_save
 from fusion_bench.taskpool.base_pool import BaseTaskPool
 from fusion_bench.utils import timeit_context
 from fusion_bench.utils.parameters import count_parameters, print_parameters
+from lightning.pytorch.utilities import rank_zero_only
 def get_model_summary(model: nn.Module) -> dict:
@@ -49,10 +50,11 @@ class DummyTaskPool(BaseTaskPool):
         Args:
             model: The model to evaluate.
         """
-        print_parameters(model, is_human_readable=True)
+        if rank_zero_only.rank == 0:
+            print_parameters(model, is_human_readable=True)
-        if self.model_save_path is not None:
-            with timeit_context(f"Saving the model to {self.model_save_path}"):
-                separate_save(model, self.model_save_path)
+            if self.model_save_path is not None:
+                with timeit_context(f"Saving the model to {self.model_save_path}"):
+                    separate_save(model, self.model_save_path)
         return get_model_summary(model)

fusion_bench/utils/__init__.py CHANGED Viewed

@@ -7,8 +7,9 @@ from .cache_utils import *
 from .devices import *
 from .dtype import parse_dtype
 from .fabric import seed_everything_by_time
-from .instantiate import instantiate, is_instantiable
+from .instantiate_utils import instantiate, is_instantiable
 from .misc import *
 from .packages import import_object
 from .parameters import *
 from .timer import timeit_context
+from .lazy_state_dict import LazyStateDict

fusion_bench/utils/data.py CHANGED Viewed

@@ -96,7 +96,7 @@ def train_validation_split(
     # Compute the number of samples for training and validation
     num_samples = len(dataset)
-    if validation_size is not None:
+    if validation_size is None:
         assert (
             0 < validation_fraction < 1
         ), "Validation fraction must be between 0 and 1"

fusion_bench/utils/{instantiate.py → instantiate_utils.py} RENAMED Viewed

@@ -41,6 +41,9 @@ def set_print_function_call(value: bool):
     finally:
         PRINT_FUNCTION_CALL = old_value
+def set_print_function_call_permeanent(value: bool):
+    global PRINT_FUNCTION_CALL
+    PRINT_FUNCTION_CALL = value
 def is_instantiable(config: Union[DictConfig, Any]) -> bool:
     if OmegaConf.is_dict(config):

fusion_bench/utils/lazy_state_dict.py ADDED Viewed

@@ -0,0 +1,268 @@
+import json
+import logging
+import os
+from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple
+import torch
+from accelerate.utils.constants import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
+from huggingface_hub import snapshot_download
+from safetensors import safe_open
+from safetensors.torch import load_file
+from transformers import AutoConfig
+from fusion_bench.utils.dtype import parse_dtype
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig
+log = logging.getLogger(__name__)
+__all__ = ["resolve_checkpoint_path", "LazyStateDict"]
+def resolve_checkpoint_path(
+    checkpoint: str,
+    hf_revision: Optional[str] = None,
+    hf_cache_dir: Optional[str] = None,
+    hf_proxies: Optional[Dict] = None,
+):
+    # If it's a local file or directory, return as is
+    if os.path.exists(checkpoint):
+        return checkpoint
+    # If it's a HuggingFace Hub model id, download snapshot
+    try:
+        # This will download the model to the cache and return the local path
+        local_path = snapshot_download(
+            repo_id=checkpoint,
+            revision=hf_revision,
+            cache_dir=hf_cache_dir,
+            proxies=hf_proxies,
+        )
+        return local_path
+    except Exception as e:
+        raise FileNotFoundError(
+            f"Could not resolve checkpoint: {checkpoint}. Error: {e}"
+        )
+class LazyStateDict:
+    """
+    Dictionary-like object that lazily loads a state dict from a checkpoint path.
+    """
+    _local_path: str
+    _state_dict_cache: Optional[Dict]
+    _index_filename: Optional[str]
+    _checkpoint_files: Optional[List[str]]
+    _index: Optional[Dict]
+    def __init__(
+        self,
+        checkpoint: str,
+        cache_state_dict: bool = False,
+        torch_dtype: Optional[torch.dtype] = None,
+        device: str = "cpu",
+        hf_revision: Optional[str] = None,
+        hf_cache_dir: Optional[str] = None,
+        hf_proxies: Optional[Dict] = None,
+    ):
+        self._checkpoint = checkpoint
+        self._local_path = resolve_checkpoint_path(
+            checkpoint,
+            hf_revision=hf_revision,
+            hf_cache_dir=hf_cache_dir,
+            hf_proxies=hf_proxies,
+        )
+        self._index, self._index_filename, self._checkpoint_files = (
+            self._resolve_checkpoint_files(self._local_path)
+        )
+        if cache_state_dict:
+            self._state_dict_cache = {}
+        else:
+            self._state_dict_cache = None
+        self._torch_dtype = parse_dtype(torch_dtype)
+        self._device = device
+    @property
+    def checkpoint(self) -> str:
+        return self._checkpoint
+    @property
+    def config(self) -> "PretrainedConfig":
+        return AutoConfig.from_pretrained(self._checkpoint)
+    def state_dict(self) -> "LazyStateDict":
+        return self
+    def _resolve_checkpoint_files(self, checkpoint: str):
+        # reference: https://huggingface.co/docs/accelerate/v0.17.1/en/usage_guides/big_modeling
+        checkpoint_files = None
+        index_filename = None
+        if os.path.isfile(checkpoint):
+            if str(checkpoint).endswith(".json"):
+                index_filename = checkpoint
+            else:
+                checkpoint_files = [checkpoint]
+        elif os.path.isdir(checkpoint):
+            # check if the whole state dict is present
+            potential_state_bin = [
+                f for f in os.listdir(checkpoint) if f == WEIGHTS_NAME
+            ]
+            potential_state_safetensor = [
+                f for f in os.listdir(checkpoint) if f == SAFE_WEIGHTS_NAME
+            ]
+            if len(potential_state_bin) == 1:
+                checkpoint_files = [os.path.join(checkpoint, potential_state_bin[0])]
+            elif len(potential_state_safetensor) == 1:
+                checkpoint_files = [
+                    os.path.join(checkpoint, potential_state_safetensor[0])
+                ]
+            else:
+                # otherwise check for sharded checkpoints
+                potential_index = [
+                    f for f in os.listdir(checkpoint) if f.endswith(".index.json")
+                ]
+                if len(potential_index) == 0:
+                    raise ValueError(
+                        f"{checkpoint} is not a folder containing a `.index.json` file or a {WEIGHTS_NAME} or a {SAFE_WEIGHTS_NAME} file"
+                    )
+                elif len(potential_index) == 1:
+                    index_filename = os.path.join(checkpoint, potential_index[0])
+                else:
+                    raise ValueError(
+                        f"{checkpoint} containing more than one `.index.json` file, delete the irrelevant ones."
+                    )
+        else:
+            raise ValueError(
+                "`checkpoint` should be the path to a file containing a whole state dict, or the index of a sharded "
+                f"checkpoint, or a folder containing a sharded checkpoint or the whole state dict, but got {checkpoint}."
+            )
+        if index_filename is not None:
+            checkpoint_folder = os.path.split(index_filename)[0]
+            with open(index_filename) as f:
+                index = json.loads(f.read())
+            if "weight_map" in index:
+                index = index["weight_map"]
+            checkpoint_files = sorted(list(set(index.values())))
+            checkpoint_files = [
+                os.path.join(checkpoint_folder, f) for f in checkpoint_files
+            ]
+        return index, index_filename, checkpoint_files
+    def _load_tensor_from_checkpoint_file(
+        self, checkpoint_file: str, key: str, update_cache: bool = True
+    ) -> torch.Tensor:
+        if checkpoint_file.endswith(".safetensors"):
+            with safe_open(checkpoint_file, framework="pt", device=self._device) as f:
+                tensor = f.get_tensor(key)
+                if self._torch_dtype is not None:
+                    tensor = tensor.to(self._torch_dtype)
+                if update_cache and self._state_dict_cache is not None:
+                    self._state_dict_cache[key] = tensor
+                return tensor
+        else:
+            state_dict = torch.load(checkpoint_file, map_location=self._device)
+            if update_cache:
+                if self._state_dict_cache is not None:
+                    self._state_dict_cache.update(state_dict)
+                else:
+                    log.warning(
+                        f"Load full state dict from file {checkpoint_file}, but state dict cache is disabled."
+                    )
+            return state_dict[key]
+    def __getitem__(self, key: str) -> torch.Tensor:
+        if self._state_dict_cache is not None and key in self._state_dict_cache:
+            return self._state_dict_cache[key]
+        if self._index is None:
+            if len(self._checkpoint_files) == 1 and os.path.isfile(
+                self._checkpoint_files[0]
+            ):
+                checkpoint_file = self._checkpoint_files[0]
+                tensor = self._load_tensor_from_checkpoint_file(
+                    checkpoint_file, key, update_cache=True
+                )
+                return tensor
+            else:
+                if len(self._checkpoint_files) > 1:
+                    raise RuntimeError(
+                        "Get multiple checkpoint files, but index is not provided."
+                    )
+                if not os.path.isfile(self._checkpoint_files[0]):
+                    raise FileNotFoundError(
+                        f"Checkpoint file {self._checkpoint_files[0]} not found."
+                    )
+                raise RuntimeError("Unexpected error.")
+        else:
+            if key not in self._index:
+                raise KeyError(f"Key {key} not found in index.")
+            checkpoint_file = os.path.join(self._local_path, self._index[key])
+            if not os.path.isfile(checkpoint_file):
+                raise FileNotFoundError(f"Checkpoint file {checkpoint_file} not found.")
+            tensor = self._load_tensor_from_checkpoint_file(
+                checkpoint_file, key, update_cache=True
+            )
+            return tensor
+    def __contains__(self, key: str) -> bool:
+        if self._state_dict_cache is not None and key in self._state_dict_cache:
+            return True
+        if self._index is not None and key in self._index:
+            return True
+        if len(self._checkpoint_files) == 1 and os.path.isfile(
+            self._checkpoint_files[0]
+        ):
+            try:
+                tensor = self._load_tensor_from_checkpoint_file(
+                    self._checkpoint_files[0], key, update_cache=False
+                )
+                return tensor is not None
+            except Exception:
+                return False
+        return False
+    def __len__(self) -> int:
+        if self._index is not None:
+            return len(self._index)
+        if len(self._checkpoint_files) == 1 and os.path.isfile(
+            self._checkpoint_files[0]
+        ):
+            checkpoint_file = self._checkpoint_files[0]
+            if checkpoint_file.endswith(".safetensors"):
+                with safe_open(checkpoint_file, framework="pt", device="cpu") as f:
+                    return len(tuple(f.keys()))
+            else:
+                return len(
+                    tuple(torch.load(checkpoint_file, map_location="cpu").keys())
+                )
+        raise RuntimeError(
+            "Unexpected error: cannot determine the number of keys in the state dict."
+        )
+    def __iter__(self) -> Iterator[str]:
+        if self._index is not None:
+            return iter(self._index)
+        return iter(self._checkpoint_files)
+    def keys(self) -> List[str]:
+        return list(self)
+    def values(self) -> List[torch.Tensor]:
+        return [self[key] for key in self]
+    def items(self) -> Iterator[Tuple[str, torch.Tensor]]:
+        return ((key, self[key]) for key in self)
+    def __repr__(self) -> str:
+        if self._index is not None:
+            return f"{self.__class__.__name__}(index={self._index})"
+        else:
+            return (
+                f"{self.__class__.__name__}(checkpoint_files={self._checkpoint_files})"
+            )

fusion_bench/utils/parameters.py CHANGED Viewed

@@ -222,6 +222,39 @@ def count_parameters(module: nn.Module, non_zero_only: bool = False) -> tuple[in
     return trainable_params, all_param
+@torch.no_grad()
+def get_parameter_summary(
+    module_or_state_dict: Union[nn.Module, StateDictType], non_zero_only: bool = False
+) -> dict:
+    """
+    Get a summary of the parameters in a PyTorch model.
+    """
+    if isinstance(module_or_state_dict, nn.Module):
+        state_dict = module_or_state_dict.state_dict(keep_vars=True)
+    else:
+        state_dict = module_or_state_dict
+    trainable_params = 0
+    all_param = 0
+    bytes = 0
+    for name, param in state_dict.items():
+        # count the number of parameters
+        num_params = _numel(param, non_zero_only)
+        bytes += _numel(param, non_zero_only=False) * param.element_size()
+        # accumulate the number of trainable and total parameters
+        all_param += num_params
+        if param.requires_grad:
+            trainable_params += num_params
+    return {
+        "trainable_params": trainable_params,
+        "all_param": all_param,
+        "bytes": bytes,
+    }
 def print_parameters(
     module: nn.Module,
     is_human_readable: bool = True,

fusion_bench/utils/pylogger.py CHANGED Viewed

@@ -53,3 +53,31 @@ class RankedLogger(logging.LoggerAdapter):
                     self.logger.log(level, msg, *args, **kwargs)
                 elif current_rank == rank:
                     self.logger.log(level, msg, *args, **kwargs)
+class RankZeroLogger(logging.Logger):
+    """A logger that logs only on rank zero and works just like logging.Logger"""
+    @rank_zero_only
+    def _log(self, *args, **kwargs):
+        if "stacklevel" in kwargs:
+            kwargs["stacklevel"] += 1
+        return super()._log(*args, **kwargs)
+    def is_global_zero(self):
+        return rank_zero_only.rank == 0
+RankZeroLogger.manager = logging.Manager(RankZeroLogger.root)
+RankZeroLogger.manager.setLoggerClass(RankZeroLogger)
+def getRankZeroLogger(name=None):
+    """
+    Return a logger with the specified name, creating it if necessary.
+    If no name is specified, return the root logger.
+    """
+    if not name or isinstance(name, str) and name == logging.root.name:
+        return logging.root
+    return RankZeroLogger.manager.getLogger(name)

fusion_bench/utils/state_dict_arithmetic.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from collections import OrderedDict
 from numbers import Number
-from typing import Dict, List, Union, cast
+from typing import Callable, Dict, List, Literal, Union, cast
 import torch
 from torch import Tensor
 from .parameters import check_parameters_all_equal
-from .type import StateDictType
+from .type import BoolStateDictType, StateDictType
 def to_device(
@@ -295,3 +295,75 @@ def state_dict_weighted_sum(
                 device, non_blocking=True
             )
     return weighted_sum_state_dict
+def state_dict_diff_abs(a: StateDictType, b: StateDictType):
+    """
+    Returns the per-layer abs of the difference between two state dicts.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+    Returns:
+        StateDictType: The absolute difference between the two state dicts.
+    """
+    diff = state_dict_sub(a, b)
+    abs_diff = {key: diff[key].abs() for key in diff}
+    return abs_diff
+def state_dict_binary_mask(
+    a: StateDictType,
+    b: StateDictType,
+    compare_fn: Union[
+        Literal["greater", "less", "equal", "not_equal"],
+        Callable[[Tensor, Tensor], torch.BoolTensor],
+    ] = "greater",
+) -> BoolStateDictType:
+    """
+    Returns the binary mask of elements in a compared to elements in b using the provided comparison function.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+        compare_fn (Union[Literal["greater", "less", "equal", "not_equal"], Callable[[Tensor, Tensor], Tensor]]): A function that takes two tensors and returns a boolean tensor.
+            Defaults to greater than comparison (x > y).
+    Returns:
+        StateDictType: A dictionary containing binary masks (0 or 1) based on the comparison.
+    """
+    compare_fn_dict = {
+        "greater": lambda x, y: x > y,
+        "less": lambda x, y: x < y,
+        "equal": lambda x, y: x == y,
+        "not_equal": lambda x, y: x != y,
+    }
+    if isinstance(compare_fn, str):
+        compare_fn = compare_fn_dict[compare_fn]
+    elif not callable(compare_fn):
+        raise ValueError(
+            f"compare_fn must be a string or a callable, but got {type(compare_fn)}"
+        )
+    mask = OrderedDict()
+    for key in a:
+        mask[key] = compare_fn(a[key], b[key])
+    return mask
+def state_dict_hadmard_product(a: StateDictType, b: StateDictType) -> StateDictType:
+    """
+    Returns the Hadamard product of two state dicts, i.e. element-wise product.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+    Returns:
+        StateDictType: The Hadamard product of the two state dicts.
+    """
+    ans = OrderedDict()
+    for key in a:
+        ans[key] = a[key] * b[key]
+    return ans

fusion_bench/utils/type.py CHANGED Viewed

@@ -9,6 +9,7 @@ try:
     from torch import Tensor, nn
     StateDictType: TypeAlias = Dict[str, Tensor]
+    BoolStateDictType: TypeAlias = Dict[str, torch.BoolTensor]
     TorchModelType = TypeVar("TorchModelType", bound=nn.Module)
 except ImportError:

{fusion_bench-0.2.15.dist-info → fusion_bench-0.2.17.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.15
+Version: 0.2.17
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -70,7 +70,7 @@ Dynamic: license-file
 FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
-Projects based on FusionBench and news from the community (descending order of date):
+Projects based on FusionBench and news from the community (descending order of date. If you have any work based on FusionBench, please feel free to let us know, we are willing to add it to the list. :partying_face:):
 <details>
   <summary>Hao Mark Chen, et al. FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization. Mar 2025. https://arxiv.org/abs/2503.12649</summary>
@@ -139,6 +139,10 @@ cd fusion_bench
 pip install -e . # install the package in editable mode
 ```
+> [!TIP]
+> FusionBench is highly dependent on the use of [Hydra](https://hydra.cc/) for configuration management and command line argument parsing, and [Lightning Fabric](https://lightning.ai/) for device management.
+> If you are not familiar with these tools, it is strongly recommended to read the [Hydra](https://hydra.cc/docs/intro/) and [Lightning Fabric](https://lightning.ai/docs/fabric/stable/) documentation.
 ### Install with [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)
@@ -167,6 +171,8 @@ It can be used to improve the performance and robustness of model or to combine
 For a more detailed introduction to deep model fusion, you can refer to [W. Li, 2023, 'Deep Model Fusion: A Survey'](https://arxiv.org/abs/2309.15698). We also provide a brief overview of deep model fusion in [our documentation](https://tanganke.github.io/fusion_bench/).
 In this benchmark, we evaluate the performance of different fusion methods on a variety of datasets and tasks.
+A comprehensive list of papers about model merging can be found at [this repository](https://github.com/EnnengYang/Awesome-Model-Merging-Methods-Theories-Applications), and [the arXiv paper](https://arxiv.org/abs/2408.07666) is also available.
 ## Project Structure
 The project is structured as follows:

fusion-bench 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl

fusion-bench 0.2.15py3-none-any.whl → 0.2.17py3-none-any.whl