PyPI - fusion-bench - Versions diffs - 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

fusion-bench 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

fusion_bench/utils/__init__.py CHANGED Viewed

@@ -2,14 +2,15 @@
 import importlib
 from typing import Iterable
-from . import data, functools, path
+from . import data, functools, path, pylogger
 from .cache_utils import *
 from .devices import *
 from .dtype import parse_dtype
 from .fabric import seed_everything_by_time
 from .instantiate_utils import instantiate, is_instantiable
+from .json import load_from_json, save_to_json
+from .lazy_state_dict import LazyStateDict
 from .misc import *
 from .packages import import_object
 from .parameters import *
 from .timer import timeit_context
-from .lazy_state_dict import LazyStateDict

fusion_bench/utils/dtype.py CHANGED Viewed

@@ -13,6 +13,7 @@ from transformers.utils import (
 PRECISION_STR_TO_DTYPE: Dict[str, torch.dtype] = {
     "fp16": torch.float16,
     "float16": torch.float16,
+    "half": torch.float16,
     "bf16": torch.bfloat16,
     "bfloat16": torch.bfloat16,
     "float": torch.float32,
@@ -50,7 +51,7 @@ def parse_dtype(dtype: Optional[str]):
     dtype = dtype.strip('"')
     if dtype not in PRECISION_STR_TO_DTYPE:
-        raise ValueError(f"Unsupported dtype: {type(dtype)}")
+        raise ValueError(f"Unsupported dtype string: {dtype}")
     dtype = PRECISION_STR_TO_DTYPE[dtype]
     return dtype

fusion_bench/utils/fabric.py CHANGED Viewed

@@ -1,17 +1,24 @@
 import time
+from typing import Optional
 import lightning as L
+from fusion_bench.utils.pylogger import getRankZeroLogger
-def seed_everything_by_time(fabric: L.Fabric):
+log = getRankZeroLogger(__name__)
+def seed_everything_by_time(fabric: Optional[L.Fabric] = None):
     """
     Set seed for all processes by time.
     """
     # set seed for all processes
-    if fabric.is_global_zero:
+    if fabric is None or fabric.is_global_zero:
         seed = int(time.time())
     else:
         seed = None
-    fabric.barrier()
-    seed = fabric.broadcast(seed, src=0)
+    if fabric is not None:
+        log.debug(f"Broadcasting seed `{seed}` to all processes")
+        fabric.barrier()
+        seed = fabric.broadcast(seed, src=0)
     L.seed_everything(seed)

fusion_bench/utils/lazy_state_dict.py CHANGED Viewed

@@ -1,16 +1,20 @@
 import json
 import logging
 import os
-from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple
+from copy import deepcopy
+from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple, Type
 import torch
+from accelerate import init_empty_weights
 from accelerate.utils.constants import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
 from huggingface_hub import snapshot_download
 from safetensors import safe_open
 from safetensors.torch import load_file
+from torch import nn
 from transformers import AutoConfig
 from fusion_bench.utils.dtype import parse_dtype
+from fusion_bench.utils.packages import import_object
 if TYPE_CHECKING:
     from transformers import PretrainedConfig
@@ -51,14 +55,19 @@ class LazyStateDict:
     """
     _local_path: str
+    """local path to the checkpoint."""
     _state_dict_cache: Optional[Dict]
+    """Cache for the state dict, if enabled."""
     _index_filename: Optional[str]
     _checkpoint_files: Optional[List[str]]
-    _index: Optional[Dict]
+    _index: Optional[Dict[str, str]]
+    """Mapping of parameter names to checkpoint files."""
     def __init__(
         self,
         checkpoint: str,
+        meta_module_class: Optional[Type[nn.Module]] = None,
+        meta_module: Optional[nn.Module] = None,
         cache_state_dict: bool = False,
         torch_dtype: Optional[torch.dtype] = None,
         device: str = "cpu",
@@ -66,6 +75,37 @@ class LazyStateDict:
         hf_cache_dir: Optional[str] = None,
         hf_proxies: Optional[Dict] = None,
     ):
+        """
+        Args:
+            checkpoint (str): Path to the checkpoint file or directory.
+            meta_module_class (Type[nn.Module], optional): Class of the meta module to instantiate.
+            meta_module (nn.Module, optional): Pre-initialized meta module.
+            cache_state_dict (bool): Whether to cache the state dict in memory.
+            torch_dtype (torch.dtype, optional): The dtype to use for the tensors.
+            device (str): The device to load the tensors onto.
+            hf_revision (str, optional): The revision of the model to download from Hugging Face Hub.
+            hf_cache_dir (str, optional): The cache directory for Hugging Face models.
+            hf_proxies (Dict, optional): Proxies to use for downloading from Hugging Face Hub.
+        """
+        self.cache_state_dict = cache_state_dict
+        self.meta_module_class = meta_module_class
+        if isinstance(self.meta_module_class, str):
+            self.meta_module_class = import_object(self.meta_module_class)
+        self.meta_module = meta_module
+        if self.meta_module_class is not None:
+            if self.meta_module is not None:
+                raise ValueError(
+                    "Cannot provide both meta_module_class and meta_module, please provide only one."
+                )
+            with init_empty_weights():
+                self.meta_module = self.meta_module_class.from_pretrained(
+                    checkpoint,
+                    torch_dtype=torch_dtype,
+                    revision=hf_revision,
+                    cache_dir=hf_cache_dir,
+                    proxies=hf_proxies,
+                )
         self._checkpoint = checkpoint
         self._local_path = resolve_checkpoint_path(
             checkpoint,
@@ -78,10 +118,44 @@ class LazyStateDict:
             self._resolve_checkpoint_files(self._local_path)
         )
-        if cache_state_dict:
-            self._state_dict_cache = {}
+        if self._index is not None:
+            # if meta_module is provided, remove the keys that are not in the meta_module
+            if self.meta_module is not None:
+                meta_module_state_dict = self.meta_module.state_dict()
+                for key in tuple(self._index.keys()):
+                    if key not in meta_module_state_dict:
+                        self._index.pop(key)
+            if cache_state_dict:
+                self._state_dict_cache = {}
+            else:
+                self._state_dict_cache = None
+        elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
+            SAFE_WEIGHTS_NAME
+        ):
+            # let the keys of self._index be the keys of the state dict, the values are the checkpoint file
+            with safe_open(
+                self._checkpoint_files[0], framework="pt", device=device
+            ) as f:
+                self._index = {key: self._checkpoint_files[0] for key in f.keys()}
+                if cache_state_dict:
+                    self._state_dict_cache = {}
+                else:
+                    self._state_dict_cache = None
+        elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
+            WEIGHTS_NAME
+        ):
+            log.info(f"Loading full state dict from {WEIGHTS_NAME}")
+            self._state_dict_cache = torch.load(self._checkpoint_files[0])
+            # if meta_module is provided, remove the keys that are not in the meta_module
+            if self.meta_module is not None:
+                meta_module_state_dict = self.meta_module.state_dict()
+                for key in tuple(self._state_dict_cache.keys()):
+                    if key not in meta_module_state_dict:
+                        self._state_dict_cache.pop(key)
         else:
-            self._state_dict_cache = None
+            raise ValueError(
+                f"Cannot determine the type of checkpoint, please provide a checkpoint path to a file containing a whole state dict with file name {WEIGHTS_NAME} or {SAFE_WEIGHTS_NAME}, or the index of a sharded checkpoint ending with `.index.json`."
+            )
         self._torch_dtype = parse_dtype(torch_dtype)
         self._device = device
@@ -94,7 +168,11 @@ class LazyStateDict:
     def config(self) -> "PretrainedConfig":
         return AutoConfig.from_pretrained(self._checkpoint)
-    def state_dict(self) -> "LazyStateDict":
+    def state_dict(self, keep_vars: bool = False) -> "LazyStateDict":
+        """
+        Args:
+            keep_vars (bool): Ignored, as LazyStateDict does not support keep_vars. Just for compatibility.
+        """
         return self
     def _resolve_checkpoint_files(self, checkpoint: str):
@@ -152,6 +230,8 @@ class LazyStateDict:
             checkpoint_files = [
                 os.path.join(checkpoint_folder, f) for f in checkpoint_files
             ]
+        else:
+            index = None
         return index, index_filename, checkpoint_files
     def _load_tensor_from_checkpoint_file(
@@ -210,6 +290,21 @@ class LazyStateDict:
             )
             return tensor
+    def __setitem__(self, key: str, value: torch.Tensor) -> None:
+        """
+        Set a tensor in the LazyStateDict. This will update the state dict cache if it is enabled.
+        """
+        assert key in list(
+            self.keys()
+        ), "KeyError: Cannot set a tensor for a key that does not exist in the LazyStateDict."
+        if self._state_dict_cache is not None:
+            self._state_dict_cache[key] = value
+        else:
+            log.warning(
+                "State dict cache is disabled, setting a tensor will not update the cache."
+            )
+            self._state_dict_cache = {key: value}
     def __contains__(self, key: str) -> bool:
         if self._state_dict_cache is not None and key in self._state_dict_cache:
             return True
@@ -248,21 +343,68 @@ class LazyStateDict:
     def __iter__(self) -> Iterator[str]:
         if self._index is not None:
             return iter(self._index)
-        return iter(self._checkpoint_files)
+        elif self._state_dict_cache is not None:
+            return iter(self._state_dict_cache)
+        else:
+            raise RuntimeError(
+                "Unexpected error: cannot determine the keys in the state dict."
+            )
-    def keys(self) -> List[str]:
-        return list(self)
+    def keys(self) -> Iterator[str]:
+        for key in self:
+            yield key
-    def values(self) -> List[torch.Tensor]:
-        return [self[key] for key in self]
+    def values(self) -> Iterator[torch.Tensor]:
+        for key in self:
+            yield self[key]
     def items(self) -> Iterator[Tuple[str, torch.Tensor]]:
-        return ((key, self[key]) for key in self)
+        for key in self:
+            yield key, self[key]
     def __repr__(self) -> str:
         if self._index is not None:
-            return f"{self.__class__.__name__}(index={self._index})"
+            return f"{self.__class__.__name__}(keys={list(self.keys())})"
         else:
             return (
                 f"{self.__class__.__name__}(checkpoint_files={self._checkpoint_files})"
             )
+    def get_parameter(self, target: str) -> torch.Tensor:
+        return self[target]
+    def get_submodule(self, target: str) -> nn.Module:
+        if self.meta_module is not None:
+            module: nn.Module = deepcopy(self.meta_module.get_submodule(target))
+            module.to_empty(device=self._device)
+            state_dict = {}
+            for name, _ in module.named_parameters():
+                state_dict[name] = self[f"{target}.{name}"]
+            module.load_state_dict(state_dict)
+            return module
+        else:
+            raise RuntimeError(
+                "Cannot get submodule because meta_module is not provided."
+            )
+    def load_state_dict(
+        self, state_dict: Dict[str, torch.Tensor], strict: bool = True
+    ) -> None:
+        """
+        Load a state dict into this LazyStateDict.
+        This method is only for compatibility with nn.Module and it overrides the cache of LazyStateDict.
+        Args:
+            state_dict (Dict[str, torch.Tensor]): The state dict to load.
+            strict (bool): Whether to enforce that all keys in the state dict are present in this LazyStateDict.
+        """
+        log.warning(
+            "Loading state dict into LazyStateDict is not recommended, as it may lead to unexpected behavior. "
+            "Use with caution."
+        )
+        if strict:
+            for key in state_dict:
+                if key not in self:
+                    raise KeyError(f"Key {key} not found in LazyStateDict.")
+        for key, value in state_dict.items():
+            self[key] = value

fusion_bench/utils/misc.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Iterable, List
-__all__ = ["first", "has_length", "join_list"]
+__all__ = ["first", "has_length", "join_list", "attr_equal"]
 def first(iterable: Iterable):
@@ -23,3 +23,21 @@ def join_list(list_of_list: List[List]):
     for item in list_of_list:
         ans.extend(item)
     return ans
+def attr_equal(obj, attr: str, value):
+    """
+    Check if the attribute of the object is equal to the given value.
+    Returns False if the attribute does not exist or is not equal to the value.
+    Args:
+        obj: The object to check.
+        attr (str): The attribute name to check.
+        value: The value to compare against.
+    Returns:
+        bool: True if the attribute exists and is equal to the value, False otherwise.
+    """
+    if not hasattr(obj, attr):
+        return False
+    return getattr(obj, attr) == value

fusion_bench/utils/pylogger.py CHANGED Viewed

@@ -62,6 +62,8 @@ class RankZeroLogger(logging.Logger):
     def _log(self, *args, **kwargs):
         if "stacklevel" in kwargs:
             kwargs["stacklevel"] += 1
+        else:
+            kwargs["stacklevel"] = 2
         return super()._log(*args, **kwargs)
     def is_global_zero(self):

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.17
+Version: 0.2.19
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
+fusion_bench/__init__.py,sha256=vu3nMzKuiiHkbH13m8SOzj8qYU-n1PreBipWs_xjZig,1937
 fusion_bench/__main__.py,sha256=weUjxpP3ULnDgUxCehdbmoCM9cqfkhDhGB85tAF5qoE,81
 fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/compat/method/__init__.py,sha256=A9pbskEYB4_ryD6sVrR0qI4eVYsbI7sExbhPeypP3fQ,5757
@@ -12,6 +12,7 @@ fusion_bench/compat/taskpool/base_pool.py,sha256=1AIZBxqUJgshq0Xo3Yo9es4b-8X8ksN
 fusion_bench/compat/taskpool/clip_image_classification.py,sha256=ZYZsbsE-fPzm6yafA0p-6wcDwVGryLmtXXtuEXeQbTY,7425
 fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py,sha256=JsdAE72V1C1eDcA1WCa0PIcSDTrGPclNKFDQ9G-hYts,5786
 fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2axIv8,41
+fusion_bench/constants/banner.py,sha256=fuIO36ETKlS6a3wbwZn-rA2OswSCfOYyyhZ0Fnal1s4,1656
 fusion_bench/constants/clip_vision.py,sha256=k0NRfiacxRaswdxUj91-e3jcP1u-RmvsaaYdqohcQVU,310
 fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
 fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
@@ -42,12 +43,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
 fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
 fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
 fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/method/__init__.py,sha256=xry6_2sAWT_qeNFgcLTE7lBWWWjGhuljrJFeWL1NBXg,7552
+fusion_bench/method/__init__.py,sha256=TMELBu1IdKN86Id1rlNlr-vqsdArti_6mlKLfobHoL4,7888
 fusion_bench/method/base_algorithm.py,sha256=UuITuGnSskcKEwUVINuPoWJUwqGm9AIgyQIOCu8BMks,1162
 fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
 fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
 fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
-fusion_bench/method/simple_average.py,sha256=vVzlfdf0mPHeY3VeOLrcWI4sWoLBW0gaX0lusjePVyQ,4539
+fusion_bench/method/simple_average.py,sha256=A_VYtHhECcxY0_Mppe5ThOY-ip6XUvvtPHsaQKSmDPc,4971
 fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
 fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
 fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
@@ -88,6 +89,12 @@ fusion_bench/method/doge_ta/__init__.py,sha256=dixO0i5fmhgC_W2_DAQ4PzYnkMCZX5D8t
 fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
 fusion_bench/method/doge_ta/doge_ta.py,sha256=ec0qIq3F72nhbCVlfqdk1PYFM7QIlfMofeVFVvmDKiE,13785
 fusion_bench/method/doge_ta/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
+fusion_bench/method/expert_sparsity/__init__.py,sha256=nt7k5cKqA2Bax1aM93ODwsEuibZ_hdFgQsUos_8h2v8,271
+fusion_bench/method/expert_sparsity/mixtral/__init__.py,sha256=FyKDZIyYUnqvGIdJ5BS639UpzSBj11g28ATHs1Yczdk,545
+fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py,sha256=e4fsXKSjCdmK-sThX6REk_d1hf-UolRLssQr7b6jD-M,5597
+fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py,sha256=GJVIose_Duk4C6Re4LtaxSzGjR8XLGGlhLhsGMECwjw,4960
+fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py,sha256=-0qWYkvHqKouJynn-kT907JQtiMLChtppOTL4SUYR9M,5090
+fusion_bench/method/expert_sparsity/utils/calibration_data.py,sha256=jEWW60qXrnAyiAPz8gbpvQ4hFeL1P1ykoIzoydAaDAk,5459
 fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqvesqk2NJw5AY_1ztLVE,225
 fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
 fusion_bench/method/fisher_merging/fisher_merging.py,sha256=OiceW0bqvnzGjIyIjd0A55ckXImDfEvi-Nk6td0sFFw,20892
@@ -108,11 +115,13 @@ fusion_bench/method/gossip/utils.py,sha256=ggMPRdxs--U2sV670oimX7jo8NGBX5Oq8Mlpr
 fusion_bench/method/isotropic_merging/__init__.py,sha256=yyx1Exfrf_4CtTjml1CIplFeeEDsSUk2Zc0AJ98ST9M,584
 fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4Rd0WcEPsYvQhBSCg,3773
 fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
+fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
 fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
 fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
 fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
 fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
-fusion_bench/method/linear/simple_average_for_llama.py,sha256=7JlVrmTMmrePvNGnZNoxSuCSq2Vu7cPQzjGC3WWUXBE,2079
+fusion_bench/method/linear/simple_average_for_llama.py,sha256=OcjvfG5nuUzdo3P4Xi1mO6ApRu51YAUYXG5lAMeD6rg,2711
 fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
 fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
 fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=ys_td1IeL3bzPTE0Cixlj2JooCaB7qseRwSDwroAk5A,18777
@@ -199,7 +208,8 @@ fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1
 fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
 fusion_bench/method/task_singular_vector/utils/TSVC_utils.py,sha256=FytKbal48EW6iGIA-2zV7QSVbYTVflXr4Mr56q0W75k,2286
 fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsNVpceHamQytZi-q4wzrCmGGQCYOm67mI,29146
-fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Pgthb9Ld1x0Qis1wKWottwgzlBcyuzByFZCMIoI6Fys,240
+fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
+fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
 fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
 fusion_bench/method/ties_merging/ties_merging.py,sha256=GAlomW4oTePXd57TvogQXoliNnEto1_QVXVGVrU1QNc,5807
 fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
@@ -243,7 +253,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
 fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
 fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
 fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
-fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=fO8lF8YWwoe43sVVOqHW9Ike7x-924-I6QQgZqx9EgA,6505
+fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=7-mUWVGVsXyljH_06CmIyReClKx_xVjy5zeXTJcLQIk,8085
 fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
 fusion_bench/modelpool/clip_vision/modelpool.py,sha256=ADgzslXwYd95x42V26XvgS09WEKGfhH_AYuQmWKdT0w,5887
 fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
@@ -253,7 +263,7 @@ fusion_bench/modelpool/seq2seq_lm/modelpool.py,sha256=IjLHi8qycWOA4Ul9jnqR48evgV
 fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=k-t4RetcDlbkRkPHNuyeV3pQEcJnFRjd9Wp5tFBb-G8,128
 fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
 fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIKAmGJwfrNSuWtxzJ_-ME0gQksEYY2y-jVt7P82Qs0,3434
-fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
+fusion_bench/models/__init__.py,sha256=w2QbRl-nIHMHNCl9X46f2CD6oqZfEDAxGRs4G9cw2nw,145
 fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
 fusion_bench/models/parameter_dict.py,sha256=RBAXZ-PFLxy3eHxQqWLEvjKIR1uTHBWdKP0XXMNGmQg,3635
 fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
@@ -264,6 +274,11 @@ fusion_bench/models/we_moe.py,sha256=0U-m3mhzb4vFLIzn2jd7j_SQOF9lot4ddzq0l_VPp9g
 fusion_bench/models/chat_templates/__init__.py,sha256=v9vKrCfBgZ3UsMBQatZv1Z-ayPualBl5ciV0aO3p3iY,85
 fusion_bench/models/chat_templates/llama_3_Instruct.py,sha256=E6grNPECr0r1KDPIGW_DmpKQw5-Dh5WbMiTaHWDXwXo,4008
 fusion_bench/models/chat_templates/load_tokenizer.py,sha256=yRs3dB2tZo0Oh-YLJcMZzWSQ5Ps8KXrggZNb5F-aBuM,1400
+fusion_bench/models/expert_sparsity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+fusion_bench/models/expert_sparsity/mixtral/__init__.py,sha256=3L_dcXW3op6ichd3GTlrTEZF_UA57Pyr13SlQRer7lg,439
+fusion_bench/models/expert_sparsity/mixtral/dataset.py,sha256=1-OxRbK-TRaQBJuOfnuzQKSV_55mMRV6iqKWBuX5BIM,1350
+fusion_bench/models/expert_sparsity/mixtral/modeling_mixtral.py,sha256=uGbn69toZ3ldHZKfwXNBijjcPQXeDdXpwJv3HnVwUbc,8252
+fusion_bench/models/expert_sparsity/mixtral/wrapper.py,sha256=1zACEwXDNbi9uwI96oD84YrCsh6b8yh25ZjP3q37muo,10167
 fusion_bench/models/linearized/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/models/linearized/linearized_model_utils.py,sha256=5yKXReQHIwDttzT_oXwY_iIpaG1zIU0Nv93BWmmOqrg,3212
 fusion_bench/models/linearized/vision_model.py,sha256=HhbhtyoLD1qVvh1Sgl_beYF2W7AvMevmUy4Jx2XlcsY,4636
@@ -326,7 +341,7 @@ fusion_bench/optim/lr_scheduler/utils/__init__.py,sha256=GfZk9VYL3cFE1Qy2xQpGc1G
 fusion_bench/optim/lr_scheduler/utils/visualization.py,sha256=Ea1n9ElNizAe0iUnjynyfteuZunv2-UBMN_NfEU2imA,3490
 fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31AsmvwvNvJw,508
 fusion_bench/programs/base_program.py,sha256=0dX_KcMWASo53pr-ldzfUBWIjEXy6oeDWZBrfc7FIk8,195
-fusion_bench/programs/fabric_fusion_program.py,sha256=r-CuvS_OxADXjQgqNm2E_poSvIx1GCMjcyRCMWrwU1w,13427
+fusion_bench/programs/fabric_fusion_program.py,sha256=978t9Fw9kvw-Il7rJLR2jNI1OfSxkhq1c5-5D4BgnYU,13813
 fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/scripts/cli.py,sha256=hw32XtmixFxYXwgAY7iRBMzma_XQjdf_FxPiXKL6dIc,1154
 fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
@@ -370,7 +385,7 @@ fusion_bench/tasks/clip_classification/fer2013.py,sha256=_oc2fdV308ywcb16rLZxBAd
 fusion_bench/tasks/clip_classification/flower102.py,sha256=p_JMs6HCCPZBKe7PTXt0WABsd-KcgmpBkxDSlJJaVVY,2096
 fusion_bench/tasks/clip_classification/food101.py,sha256=Oepvws5byGxrHswXt3ILG3UEPiZaFXYqK1yJqm1uYVE,1968
 fusion_bench/tasks/clip_classification/gtsrb.py,sha256=Dsaz-XNz6oA9nNTF2C2iXmmhhVz-gsw-WcGuFTqjzl4,2677
-fusion_bench/tasks/clip_classification/imagenet.py,sha256=Az7gnFuecVCDkP3mMjiLwOgrYAf_cxz177kkdivComU,38815
+fusion_bench/tasks/clip_classification/imagenet.py,sha256=EhZ2iYAc8oApr5BU_vgM3cDY879anTkvb-5hfi-B7m4,48826
 fusion_bench/tasks/clip_classification/kmnist.py,sha256=Ohce6aVaXkPnviDaZYXANMhhBNHZXO3FnXYxYG-ISVg,311
 fusion_bench/tasks/clip_classification/mnist.py,sha256=-gQpHz_kCXmUOtAsM8FBUYFjlwcbAgnqpuVtRfCJ3JM,129
 fusion_bench/tasks/clip_classification/mongo_leaf_disease.py,sha256=L_2IgnzbZdGZrX27VNGu1rC-N3Aj4fetIXB9HM1QZkI,519
@@ -389,26 +404,26 @@ fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py,sha256=-B1wqVGp3wZ
 fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py,sha256=sVihXHbqwi8IlDpiIxzvmDv-Ob7WKvi23GIRYbBUKOc,1833
 fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py,sha256=GhRmGmcJGF4oVgZQarsBtx8GNKrNEZUkrillNz3iBuY,13183
 fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py,sha256=mKMTXIr5o-BqS_Hvv1bbMvvjQLLeKNVw7BKS9qgQ8Dw,1890
-fusion_bench/utils/__init__.py,sha256=E_K0a1V761KJCn623tL23QpqcnngIcLYo_6WK8Y0Xtc,447
+fusion_bench/utils/__init__.py,sha256=XbmQGNmzVKnPLodevlM15iEIXCFx3hled7Vni4fzPYc,504
 fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
 fusion_bench/utils/cache_utils.py,sha256=rU8x4-RFUtaCZWKd4Kft_7xgPTr1bpXnqUDMkrIdpj8,1653
 fusion_bench/utils/data.py,sha256=L3aS2OwlpiXoILdPlo-j03gJh4s2LpAJw6fw9uY5G7c,6571
 fusion_bench/utils/devices.py,sha256=MIAxbEGinN-QU4W1g3-YKkJsteHQrwhbLqkmbzX1W3U,8035
 fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
-fusion_bench/utils/dtype.py,sha256=kYoEGqsXitnwOU3W7ivqhQ0OjdI7MGu1VsyMJS4cSyQ,4299
+fusion_bench/utils/dtype.py,sha256=qtsDFfm5XTuxsjvVg-orpWvbhebCvyivzzZbLg-xiaA,4327
 fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
-fusion_bench/utils/fabric.py,sha256=X2B_QPT2kqDPceQo3tp4XYAKbBpIs07w94Je_h2_81w,355
+fusion_bench/utils/fabric.py,sha256=dF0Aj8NmVir30io6WcL5gpWmbQSPlEADvw_yFxFx1sQ,613
 fusion_bench/utils/functools.py,sha256=7_tYJ2WD88_2DDuOOj5aZz3cYuslYH5tsVyIgCeLtmk,1318
 fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqkoSGk,1174
 fusion_bench/utils/instantiate_utils.py,sha256=57D8YP25OO-ArltOSsHDKtnNcA44m1yAq-1wKZc2YVI,17523
 fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
 fusion_bench/utils/lazy_imports.py,sha256=v5l9cpHXPMaz1IVBmB5oOqefYr9vA3XvP340xT7Wy18,2796
-fusion_bench/utils/lazy_state_dict.py,sha256=0KBd3j6A_T_9-m8t68tSDpQZB_MWk9-cwho3O_8PkXY,10150
-fusion_bench/utils/misc.py,sha256=Rgec7eKcGIcp9BaFVdm2pzx0J-L8AyX5qWuiYNTGvTc,530
+fusion_bench/utils/lazy_state_dict.py,sha256=Hu8PkhbJcUikXJxWUJ7vabu2uDbnUUF6UsRS0k8i71U,16841
+fusion_bench/utils/misc.py,sha256=Qc3_H8UMooOp81Ow89zqvM1sNPIybq1cbq7s4-4lsfU,1082
 fusion_bench/utils/packages.py,sha256=L64paDi1SmeT3gRvRV6LaqB8AeGdzIYWIRI31qSQbSk,2110
 fusion_bench/utils/parameters.py,sha256=2vs8vo2o-nRA9NOMOYFye-X8-aHQZoYe54tM6n0r0RE,11757
 fusion_bench/utils/path.py,sha256=hRA1CPHNnTYBUmzbftH77sHvn4aTuybEK5Tth1skP-k,531
-fusion_bench/utils/pylogger.py,sha256=05gF2DNtdQG_Ldw029ufj4_IprBpciMVOznwpgaJUpI,3282
+fusion_bench/utils/pylogger.py,sha256=amlRsdqHpOjxmBl6f9TA8y0LaWelEWgQNcGgEGsVOIc,3333
 fusion_bench/utils/rich_utils.py,sha256=B8DhAYuVp23pG6ZnnYrUhcL-ikHZoQeTNqlM7u4pwwU,5786
 fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
 fusion_bench/utils/state_dict_arithmetic.py,sha256=iz5YYhMJpg2-lBLBY8E1onV4i_GkRhJOGn2DjhLBbYE,11390
@@ -422,7 +437,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
 fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
 fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
 fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
-fusion_bench-0.2.17.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
+fusion_bench-0.2.19.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
 fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
 fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
 fusion_bench_config/fabric_model_fusion.yaml,sha256=YwJx_aUXm4ca4_mVItKVUOesMvmBBRGudQIOqgc1EP8,974
@@ -525,6 +540,7 @@ fusion_bench_config/fabric/llama_ddp.yaml,sha256=bOOuK5BPKmScE6yh5xY59qlawlMk2sR
 fusion_bench_config/fabric/llama_fsdp.yaml,sha256=pTvz0k79dSOVAAlvU0T1kNd8TNCwz2FGjDOujBtQ_Ks,574
 fusion_bench_config/fabric/llama_peft_fsdp.yaml,sha256=AosSmY4624iahKbTWY681BsZTC1ul78x9aHZ9zHS81s,579
 fusion_bench_config/fabric/loggers/csv_logger.yaml,sha256=Pv8I-xbxrpTb_fwtDiUtCAEoCZ8QYCLu2GeJNzb3Z3c,373
+fusion_bench_config/fabric/loggers/mlflow_logger.yaml,sha256=iu_3Y57hRuc-FjJGoTDlcRqxq3K6U2vHBaBvhOPp8hk,71
 fusion_bench_config/fabric/loggers/tensorboard_logger.yaml,sha256=w9ZP1i8lRYQFslzEM98PmbcFhhn5dXReSJhLOdEi-do,381
 fusion_bench_config/fabric/loggers/wandb_logger.yaml,sha256=eF4slc6QPRuMCMJVeFHNJirsGiB15WQIxNgioXNwezc,142
 fusion_bench_config/fabric/strategy/deepspeed.yaml,sha256=zcSUeHVaATy92oTTRx3_hWQkCB3BPR7YOIt_U1gimCU,343
@@ -567,6 +583,8 @@ fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKl
 fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
 fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=Ih9dqifpnvxW2QfJqp8Q8S8W1k7VZG9ulyPxkcuaWsw,54
 fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
+fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
+fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
 fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml,sha256=rl7kfVvdo2pG-DnglQUbjzkyBqnq1FpfoSDSjFtdLwk,633
 fusion_bench_config/method/fisher_merging/fisher_merging.yaml,sha256=B1wrv9mhaOID4KcAUEMZNxlvY3tR3Q3UGualFslvx-Y,475
 fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml,sha256=AE7XZqRDj4__J_ipEcjPs7qTB2J3xLQyFRlq1W4iHFE,563
@@ -778,6 +796,7 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=yC2U_IoBAhawgSahY_mdi7ea5kJ2SSRPJ2FM-bA-E9M,510
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=a2nviqKSRNoQScYVbj5buq0PbUzmYJwNWdPBUoLaeV8,386
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=G6yvZuWOKb75RLn6tu2LPnwHUyvoxPfL_wqb_B11aZo,549
+fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml,sha256=HZXjqbZKpSZCHb-G8qjj03PcvXg_8mrAuewDHZp0oEw,263
 fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
 fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
 fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
@@ -858,8 +877,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
-fusion_bench-0.2.17.dist-info/METADATA,sha256=cBTM1-Dfm6gdMfQ6vqrxpg7o5abvCwn3b1zb4KUSgHY,21966
-fusion_bench-0.2.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-fusion_bench-0.2.17.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
-fusion_bench-0.2.17.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
-fusion_bench-0.2.17.dist-info/RECORD,,
+fusion_bench-0.2.19.dist-info/METADATA,sha256=5pl4dtlAYklMMiMLBeKNaHqCQRd7sLSct7aIh9JIoGY,21966
+fusion_bench-0.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+fusion_bench-0.2.19.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
+fusion_bench-0.2.19.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
+fusion_bench-0.2.19.dist-info/RECORD,,

fusion_bench_config/fabric/loggers/mlflow_logger.yaml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # https://mlflow.org/
2	+ _target_: lightning.pytorch.loggers.MLFlowLogger

fusion_bench_config/method/expert_sparsity/README.md ADDED Viewed

@@ -0,0 +1,6 @@
+Original repo: https://github.com/Lucky-Lance/Expert_Sparsity
+Reference:
+    Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models.
+    ACL 2024.
+    http://arxiv.org/abs/2402.14800

fusion_bench_config/method/expert_sparsity/mixtral.yaml ADDED Viewed

@@ -0,0 +1,17 @@
+_target_: fusion_bench.method.LayerWisePruningForMixtral
+num_preserved_experts: 4
+# c4 or math
+# corresponding to the keys of `fusion_bench.method.expert_sparsity.utils.calibration_data.DATASETS`
+calib_set: c4
+# Maximal sequence length of each sample in calibration set
+max_block_size: 2048
+# Number of sequences in calibration set. If set to 0 or negative, the whole dataset will be used
+n_blocks_for_stat: 128
+# Batch size for model inference
+batch_size: 8
+# Number of workers in dataloader
+num_workers: 8
+# Random seed
+seed: 42
+# Path to save the pruned model
+model_save_path: "{log_dir}/pruned_model"

fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+_recursive_: false
+load_lazy: false
+models:
+  _pretrained_: Qwen/Qwen2.5-1.5B
+  expert_1: Qwen/Qwen2.5-Math-1.5B
+  expert_2: Qwen/Qwen2.5-Coder-1.5B
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: Qwen/Qwen2.5-1.5B

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

fusion-bench 0.2.17py3-none-any.whl → 0.2.19py3-none-any.whl