PyPI - fusion-bench - Versions diffs - 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl - Mend

fusion-bench 0.2.15py3-none-any.whl → 0.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py ADDED Viewed

@@ -0,0 +1,38 @@
+from typing import List, Optional, Union
+from transformers.models.llama import LlamaTokenizerFast
+class DeepseekTokenizerFast(LlamaTokenizerFast):
+    def convert_ids_to_tokens(
+        self, ids: Union[int, List[int]], skip_special_tokens: bool = False
+    ) -> Union[str, List[str]]:
+        """
+        Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and
+        added tokens.
+        Args:
+            ids (`int` or `List[int]`):
+                The token id (or token ids) to convert to tokens.
+            skip_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not to remove special tokens in the decoding.
+        Returns:
+            `str` or `List[str]`: The decoded token(s).
+        """
+        if isinstance(ids, int):
+            return self._convert_id_to_token(ids)
+        tokens = []
+        for index in ids:
+            index = int(index)
+            if skip_special_tokens and index in self.all_special_ids:
+                continue
+            token = self._tokenizer.id_to_token(index)
+            tokens.append(token if token is not None else "")
+        return tokens
+    def _convert_id_to_token(self, index: int) -> Optional[str]:
+        token = self._tokenizer.id_to_token(int(index))
+        return token if token is not None else ""

fusion_bench/programs/fabric_fusion_program.py CHANGED Viewed

@@ -196,6 +196,11 @@ class FabricModelFusionProgram(
             for key, item in merged_model.items():
                 if isinstance(item, nn.Module):
                     report[key] = taskpool.evaluate(item, *args, **kwargs)
+                elif key == "models":
+                    # for multi-model evaluation
+                    report[key] = self.evaluate_merged_model(
+                        taskpool, item, *args, **kwargs
+                    )
                 else:
                     # metadata
                     report[key] = item

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -348,8 +348,15 @@ class CLIPVisionModelTaskPool(
         log.info(f"Evaluation Result: {report}")
         if self.fabric.is_global_zero and len(self.fabric._loggers) > 0:
-            with open(os.path.join(self.log_dir, "report.json"), "w") as fp:
+            save_path = os.path.join(self.log_dir, "report.json")
+            for version in itertools.count(1):
+                if not os.path.exists(save_path):
+                    break
+                # if the file already exists, increment the version to avoid overwriting
+                save_path = os.path.join(self.log_dir, f"report_{version}.json")
+            with open(save_path, "w") as fp:
                 json.dump(report, fp)
+            log.info(f"Evaluation report saved to {save_path}")
         return report
     def on_task_evaluation_begin(self, classifier: HFCLIPClassifier, task_name: str):

fusion_bench/utils/__init__.py CHANGED Viewed

@@ -12,3 +12,4 @@ from .misc import *
 from .packages import import_object
 from .parameters import *
 from .timer import timeit_context
+from .lazy_state_dict import LazyStateDict

fusion_bench/utils/data.py CHANGED Viewed

@@ -96,7 +96,7 @@ def train_validation_split(
     # Compute the number of samples for training and validation
     num_samples = len(dataset)
-    if validation_size is not None:
+    if validation_size is None:
         assert (
             0 < validation_fraction < 1
         ), "Validation fraction must be between 0 and 1"

fusion_bench/utils/lazy_state_dict.py ADDED Viewed

@@ -0,0 +1,268 @@
+import json
+import logging
+import os
+from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple
+import torch
+from accelerate.utils.constants import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
+from huggingface_hub import snapshot_download
+from safetensors import safe_open
+from safetensors.torch import load_file
+from transformers import AutoConfig
+from fusion_bench.utils.dtype import parse_dtype
+if TYPE_CHECKING:
+    from transformers import PretrainedConfig
+log = logging.getLogger(__name__)
+__all__ = ["resolve_checkpoint_path", "LazyStateDict"]
+def resolve_checkpoint_path(
+    checkpoint: str,
+    hf_revision: Optional[str] = None,
+    hf_cache_dir: Optional[str] = None,
+    hf_proxies: Optional[Dict] = None,
+):
+    # If it's a local file or directory, return as is
+    if os.path.exists(checkpoint):
+        return checkpoint
+    # If it's a HuggingFace Hub model id, download snapshot
+    try:
+        # This will download the model to the cache and return the local path
+        local_path = snapshot_download(
+            repo_id=checkpoint,
+            revision=hf_revision,
+            cache_dir=hf_cache_dir,
+            proxies=hf_proxies,
+        )
+        return local_path
+    except Exception as e:
+        raise FileNotFoundError(
+            f"Could not resolve checkpoint: {checkpoint}. Error: {e}"
+        )
+class LazyStateDict:
+    """
+    Dictionary-like object that lazily loads a state dict from a checkpoint path.
+    """
+    _local_path: str
+    _state_dict_cache: Optional[Dict]
+    _index_filename: Optional[str]
+    _checkpoint_files: Optional[List[str]]
+    _index: Optional[Dict]
+    def __init__(
+        self,
+        checkpoint: str,
+        cache_state_dict: bool = False,
+        torch_dtype: Optional[torch.dtype] = None,
+        device: str = "cpu",
+        hf_revision: Optional[str] = None,
+        hf_cache_dir: Optional[str] = None,
+        hf_proxies: Optional[Dict] = None,
+    ):
+        self._checkpoint = checkpoint
+        self._local_path = resolve_checkpoint_path(
+            checkpoint,
+            hf_revision=hf_revision,
+            hf_cache_dir=hf_cache_dir,
+            hf_proxies=hf_proxies,
+        )
+        self._index, self._index_filename, self._checkpoint_files = (
+            self._resolve_checkpoint_files(self._local_path)
+        )
+        if cache_state_dict:
+            self._state_dict_cache = {}
+        else:
+            self._state_dict_cache = None
+        self._torch_dtype = parse_dtype(torch_dtype)
+        self._device = device
+    @property
+    def checkpoint(self) -> str:
+        return self._checkpoint
+    @property
+    def config(self) -> "PretrainedConfig":
+        return AutoConfig.from_pretrained(self._checkpoint)
+    def state_dict(self) -> "LazyStateDict":
+        return self
+    def _resolve_checkpoint_files(self, checkpoint: str):
+        # reference: https://huggingface.co/docs/accelerate/v0.17.1/en/usage_guides/big_modeling
+        checkpoint_files = None
+        index_filename = None
+        if os.path.isfile(checkpoint):
+            if str(checkpoint).endswith(".json"):
+                index_filename = checkpoint
+            else:
+                checkpoint_files = [checkpoint]
+        elif os.path.isdir(checkpoint):
+            # check if the whole state dict is present
+            potential_state_bin = [
+                f for f in os.listdir(checkpoint) if f == WEIGHTS_NAME
+            ]
+            potential_state_safetensor = [
+                f for f in os.listdir(checkpoint) if f == SAFE_WEIGHTS_NAME
+            ]
+            if len(potential_state_bin) == 1:
+                checkpoint_files = [os.path.join(checkpoint, potential_state_bin[0])]
+            elif len(potential_state_safetensor) == 1:
+                checkpoint_files = [
+                    os.path.join(checkpoint, potential_state_safetensor[0])
+                ]
+            else:
+                # otherwise check for sharded checkpoints
+                potential_index = [
+                    f for f in os.listdir(checkpoint) if f.endswith(".index.json")
+                ]
+                if len(potential_index) == 0:
+                    raise ValueError(
+                        f"{checkpoint} is not a folder containing a `.index.json` file or a {WEIGHTS_NAME} or a {SAFE_WEIGHTS_NAME} file"
+                    )
+                elif len(potential_index) == 1:
+                    index_filename = os.path.join(checkpoint, potential_index[0])
+                else:
+                    raise ValueError(
+                        f"{checkpoint} containing more than one `.index.json` file, delete the irrelevant ones."
+                    )
+        else:
+            raise ValueError(
+                "`checkpoint` should be the path to a file containing a whole state dict, or the index of a sharded "
+                f"checkpoint, or a folder containing a sharded checkpoint or the whole state dict, but got {checkpoint}."
+            )
+        if index_filename is not None:
+            checkpoint_folder = os.path.split(index_filename)[0]
+            with open(index_filename) as f:
+                index = json.loads(f.read())
+            if "weight_map" in index:
+                index = index["weight_map"]
+            checkpoint_files = sorted(list(set(index.values())))
+            checkpoint_files = [
+                os.path.join(checkpoint_folder, f) for f in checkpoint_files
+            ]
+        return index, index_filename, checkpoint_files
+    def _load_tensor_from_checkpoint_file(
+        self, checkpoint_file: str, key: str, update_cache: bool = True
+    ) -> torch.Tensor:
+        if checkpoint_file.endswith(".safetensors"):
+            with safe_open(checkpoint_file, framework="pt", device=self._device) as f:
+                tensor = f.get_tensor(key)
+                if self._torch_dtype is not None:
+                    tensor = tensor.to(self._torch_dtype)
+                if update_cache and self._state_dict_cache is not None:
+                    self._state_dict_cache[key] = tensor
+                return tensor
+        else:
+            state_dict = torch.load(checkpoint_file, map_location=self._device)
+            if update_cache:
+                if self._state_dict_cache is not None:
+                    self._state_dict_cache.update(state_dict)
+                else:
+                    log.warning(
+                        f"Load full state dict from file {checkpoint_file}, but state dict cache is disabled."
+                    )
+            return state_dict[key]
+    def __getitem__(self, key: str) -> torch.Tensor:
+        if self._state_dict_cache is not None and key in self._state_dict_cache:
+            return self._state_dict_cache[key]
+        if self._index is None:
+            if len(self._checkpoint_files) == 1 and os.path.isfile(
+                self._checkpoint_files[0]
+            ):
+                checkpoint_file = self._checkpoint_files[0]
+                tensor = self._load_tensor_from_checkpoint_file(
+                    checkpoint_file, key, update_cache=True
+                )
+                return tensor
+            else:
+                if len(self._checkpoint_files) > 1:
+                    raise RuntimeError(
+                        "Get multiple checkpoint files, but index is not provided."
+                    )
+                if not os.path.isfile(self._checkpoint_files[0]):
+                    raise FileNotFoundError(
+                        f"Checkpoint file {self._checkpoint_files[0]} not found."
+                    )
+                raise RuntimeError("Unexpected error.")
+        else:
+            if key not in self._index:
+                raise KeyError(f"Key {key} not found in index.")
+            checkpoint_file = os.path.join(self._local_path, self._index[key])
+            if not os.path.isfile(checkpoint_file):
+                raise FileNotFoundError(f"Checkpoint file {checkpoint_file} not found.")
+            tensor = self._load_tensor_from_checkpoint_file(
+                checkpoint_file, key, update_cache=True
+            )
+            return tensor
+    def __contains__(self, key: str) -> bool:
+        if self._state_dict_cache is not None and key in self._state_dict_cache:
+            return True
+        if self._index is not None and key in self._index:
+            return True
+        if len(self._checkpoint_files) == 1 and os.path.isfile(
+            self._checkpoint_files[0]
+        ):
+            try:
+                tensor = self._load_tensor_from_checkpoint_file(
+                    self._checkpoint_files[0], key, update_cache=False
+                )
+                return tensor is not None
+            except Exception:
+                return False
+        return False
+    def __len__(self) -> int:
+        if self._index is not None:
+            return len(self._index)
+        if len(self._checkpoint_files) == 1 and os.path.isfile(
+            self._checkpoint_files[0]
+        ):
+            checkpoint_file = self._checkpoint_files[0]
+            if checkpoint_file.endswith(".safetensors"):
+                with safe_open(checkpoint_file, framework="pt", device="cpu") as f:
+                    return len(tuple(f.keys()))
+            else:
+                return len(
+                    tuple(torch.load(checkpoint_file, map_location="cpu").keys())
+                )
+        raise RuntimeError(
+            "Unexpected error: cannot determine the number of keys in the state dict."
+        )
+    def __iter__(self) -> Iterator[str]:
+        if self._index is not None:
+            return iter(self._index)
+        return iter(self._checkpoint_files)
+    def keys(self) -> List[str]:
+        return list(self)
+    def values(self) -> List[torch.Tensor]:
+        return [self[key] for key in self]
+    def items(self) -> Iterator[Tuple[str, torch.Tensor]]:
+        return ((key, self[key]) for key in self)
+    def __repr__(self) -> str:
+        if self._index is not None:
+            return f"{self.__class__.__name__}(index={self._index})"
+        else:
+            return (
+                f"{self.__class__.__name__}(checkpoint_files={self._checkpoint_files})"
+            )

fusion_bench/utils/parameters.py CHANGED Viewed

@@ -222,6 +222,39 @@ def count_parameters(module: nn.Module, non_zero_only: bool = False) -> tuple[in
     return trainable_params, all_param
+@torch.no_grad()
+def get_parameter_summary(
+    module_or_state_dict: Union[nn.Module, StateDictType], non_zero_only: bool = False
+) -> dict:
+    """
+    Get a summary of the parameters in a PyTorch model.
+    """
+    if isinstance(module_or_state_dict, nn.Module):
+        state_dict = module_or_state_dict.state_dict(keep_vars=True)
+    else:
+        state_dict = module_or_state_dict
+    trainable_params = 0
+    all_param = 0
+    bytes = 0
+    for name, param in state_dict.items():
+        # count the number of parameters
+        num_params = _numel(param, non_zero_only)
+        bytes += _numel(param, non_zero_only=False) * param.element_size()
+        # accumulate the number of trainable and total parameters
+        all_param += num_params
+        if param.requires_grad:
+            trainable_params += num_params
+    return {
+        "trainable_params": trainable_params,
+        "all_param": all_param,
+        "bytes": bytes,
+    }
 def print_parameters(
     module: nn.Module,
     is_human_readable: bool = True,

fusion_bench/utils/state_dict_arithmetic.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from collections import OrderedDict
 from numbers import Number
-from typing import Dict, List, Union, cast
+from typing import Callable, Dict, List, Literal, Union, cast
 import torch
 from torch import Tensor
 from .parameters import check_parameters_all_equal
-from .type import StateDictType
+from .type import BoolStateDictType, StateDictType
 def to_device(
@@ -295,3 +295,75 @@ def state_dict_weighted_sum(
                 device, non_blocking=True
             )
     return weighted_sum_state_dict
+def state_dict_diff_abs(a: StateDictType, b: StateDictType):
+    """
+    Returns the per-layer abs of the difference between two state dicts.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+    Returns:
+        StateDictType: The absolute difference between the two state dicts.
+    """
+    diff = state_dict_sub(a, b)
+    abs_diff = {key: diff[key].abs() for key in diff}
+    return abs_diff
+def state_dict_binary_mask(
+    a: StateDictType,
+    b: StateDictType,
+    compare_fn: Union[
+        Literal["greater", "less", "equal", "not_equal"],
+        Callable[[Tensor, Tensor], torch.BoolTensor],
+    ] = "greater",
+) -> BoolStateDictType:
+    """
+    Returns the binary mask of elements in a compared to elements in b using the provided comparison function.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+        compare_fn (Union[Literal["greater", "less", "equal", "not_equal"], Callable[[Tensor, Tensor], Tensor]]): A function that takes two tensors and returns a boolean tensor.
+            Defaults to greater than comparison (x > y).
+    Returns:
+        StateDictType: A dictionary containing binary masks (0 or 1) based on the comparison.
+    """
+    compare_fn_dict = {
+        "greater": lambda x, y: x > y,
+        "less": lambda x, y: x < y,
+        "equal": lambda x, y: x == y,
+        "not_equal": lambda x, y: x != y,
+    }
+    if isinstance(compare_fn, str):
+        compare_fn = compare_fn_dict[compare_fn]
+    elif not callable(compare_fn):
+        raise ValueError(
+            f"compare_fn must be a string or a callable, but got {type(compare_fn)}"
+        )
+    mask = OrderedDict()
+    for key in a:
+        mask[key] = compare_fn(a[key], b[key])
+    return mask
+def state_dict_hadmard_product(a: StateDictType, b: StateDictType) -> StateDictType:
+    """
+    Returns the Hadamard product of two state dicts, i.e. element-wise product.
+    Args:
+        a (StateDictType): The first state dict.
+        b (StateDictType): The second state dict.
+    Returns:
+        StateDictType: The Hadamard product of the two state dicts.
+    """
+    ans = OrderedDict()
+    for key in a:
+        ans[key] = a[key] * b[key]
+    return ans

fusion_bench/utils/type.py CHANGED Viewed

@@ -9,6 +9,7 @@ try:
     from torch import Tensor, nn
     StateDictType: TypeAlias = Dict[str, Tensor]
+    BoolStateDictType: TypeAlias = Dict[str, torch.BoolTensor]
     TorchModelType = TypeVar("TorchModelType", bound=nn.Module)
 except ImportError:

{fusion_bench-0.2.15.dist-info → fusion_bench-0.2.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.15
+Version: 0.2.16
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -70,7 +70,7 @@ Dynamic: license-file
 FusionBench is a benchmark suite designed to evaluate the performance of various deep model fusion techniques. It aims to provide a comprehensive comparison of different methods on a variety of datasets and tasks.
-Projects based on FusionBench and news from the community (descending order of date):
+Projects based on FusionBench and news from the community (descending order of date. If you have any work based on FusionBench, please feel free to let us know, we are willing to add it to the list. :partying_face:):
 <details>
   <summary>Hao Mark Chen, et al. FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization. Mar 2025. https://arxiv.org/abs/2503.12649</summary>
@@ -139,6 +139,10 @@ cd fusion_bench
 pip install -e . # install the package in editable mode
 ```
+> [!TIP]
+> FusionBench is highly dependent on the use of [Hydra](https://hydra.cc/) for configuration management and command line argument parsing, and [Lightning Fabric](https://lightning.ai/) for device management.
+> If you are not familiar with these tools, it is strongly recommended to read the [Hydra](https://hydra.cc/docs/intro/) and [Lightning Fabric](https://lightning.ai/docs/fabric/stable/) documentation.
 ### Install with [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)

fusion-bench 0.2.15__py3-none-any.whl → 0.2.16__py3-none-any.whl

fusion-bench 0.2.15py3-none-any.whl → 0.2.16py3-none-any.whl