fusion-bench 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. fusion_bench/__init__.py +6 -0
  2. fusion_bench/constants/banner.py +12 -0
  3. fusion_bench/method/__init__.py +11 -0
  4. fusion_bench/method/expert_sparsity/__init__.py +10 -0
  5. fusion_bench/method/expert_sparsity/mixtral/__init__.py +23 -0
  6. fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py +175 -0
  7. fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py +159 -0
  8. fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py +173 -0
  9. fusion_bench/method/expert_sparsity/utils/calibration_data.py +153 -0
  10. fusion_bench/method/knots/__init__.py +0 -0
  11. fusion_bench/method/knots/knots_utils.py +23 -0
  12. fusion_bench/method/linear/simple_average_for_llama.py +17 -3
  13. fusion_bench/method/simple_average.py +10 -0
  14. fusion_bench/method/task_singular_vector/utils/__init__.py +1 -0
  15. fusion_bench/method/task_singular_vector/utils/task_singular_interference.py +41 -0
  16. fusion_bench/modelpool/causal_lm/causal_lm.py +45 -11
  17. fusion_bench/models/__init__.py +1 -0
  18. fusion_bench/models/expert_sparsity/__init__.py +0 -0
  19. fusion_bench/models/expert_sparsity/mixtral/__init__.py +15 -0
  20. fusion_bench/models/expert_sparsity/mixtral/dataset.py +40 -0
  21. fusion_bench/models/expert_sparsity/mixtral/modeling_mixtral.py +207 -0
  22. fusion_bench/models/expert_sparsity/mixtral/wrapper.py +268 -0
  23. fusion_bench/programs/fabric_fusion_program.py +12 -8
  24. fusion_bench/tasks/clip_classification/imagenet.py +1008 -2004
  25. fusion_bench/utils/__init__.py +3 -2
  26. fusion_bench/utils/dtype.py +2 -1
  27. fusion_bench/utils/fabric.py +11 -4
  28. fusion_bench/utils/lazy_state_dict.py +155 -13
  29. fusion_bench/utils/misc.py +19 -1
  30. fusion_bench/utils/pylogger.py +2 -0
  31. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/METADATA +1 -1
  32. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/RECORD +40 -21
  33. fusion_bench_config/fabric/loggers/mlflow_logger.yaml +2 -0
  34. fusion_bench_config/method/expert_sparsity/README.md +6 -0
  35. fusion_bench_config/method/expert_sparsity/mixtral.yaml +17 -0
  36. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +11 -0
  37. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/WHEEL +0 -0
  38. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/entry_points.txt +0 -0
  39. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/licenses/LICENSE +0 -0
  40. {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/top_level.txt +0 -0
@@ -2,14 +2,15 @@
2
2
  import importlib
3
3
  from typing import Iterable
4
4
 
5
- from . import data, functools, path
5
+ from . import data, functools, path, pylogger
6
6
  from .cache_utils import *
7
7
  from .devices import *
8
8
  from .dtype import parse_dtype
9
9
  from .fabric import seed_everything_by_time
10
10
  from .instantiate_utils import instantiate, is_instantiable
11
+ from .json import load_from_json, save_to_json
12
+ from .lazy_state_dict import LazyStateDict
11
13
  from .misc import *
12
14
  from .packages import import_object
13
15
  from .parameters import *
14
16
  from .timer import timeit_context
15
- from .lazy_state_dict import LazyStateDict
@@ -13,6 +13,7 @@ from transformers.utils import (
13
13
  PRECISION_STR_TO_DTYPE: Dict[str, torch.dtype] = {
14
14
  "fp16": torch.float16,
15
15
  "float16": torch.float16,
16
+ "half": torch.float16,
16
17
  "bf16": torch.bfloat16,
17
18
  "bfloat16": torch.bfloat16,
18
19
  "float": torch.float32,
@@ -50,7 +51,7 @@ def parse_dtype(dtype: Optional[str]):
50
51
 
51
52
  dtype = dtype.strip('"')
52
53
  if dtype not in PRECISION_STR_TO_DTYPE:
53
- raise ValueError(f"Unsupported dtype: {type(dtype)}")
54
+ raise ValueError(f"Unsupported dtype string: {dtype}")
54
55
 
55
56
  dtype = PRECISION_STR_TO_DTYPE[dtype]
56
57
  return dtype
@@ -1,17 +1,24 @@
1
1
  import time
2
+ from typing import Optional
2
3
 
3
4
  import lightning as L
4
5
 
6
+ from fusion_bench.utils.pylogger import getRankZeroLogger
5
7
 
6
- def seed_everything_by_time(fabric: L.Fabric):
8
+ log = getRankZeroLogger(__name__)
9
+
10
+
11
+ def seed_everything_by_time(fabric: Optional[L.Fabric] = None):
7
12
  """
8
13
  Set seed for all processes by time.
9
14
  """
10
15
  # set seed for all processes
11
- if fabric.is_global_zero:
16
+ if fabric is None or fabric.is_global_zero:
12
17
  seed = int(time.time())
13
18
  else:
14
19
  seed = None
15
- fabric.barrier()
16
- seed = fabric.broadcast(seed, src=0)
20
+ if fabric is not None:
21
+ log.debug(f"Broadcasting seed `{seed}` to all processes")
22
+ fabric.barrier()
23
+ seed = fabric.broadcast(seed, src=0)
17
24
  L.seed_everything(seed)
@@ -1,16 +1,20 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
- from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple
4
+ from copy import deepcopy
5
+ from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple, Type
5
6
 
6
7
  import torch
8
+ from accelerate import init_empty_weights
7
9
  from accelerate.utils.constants import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
8
10
  from huggingface_hub import snapshot_download
9
11
  from safetensors import safe_open
10
12
  from safetensors.torch import load_file
13
+ from torch import nn
11
14
  from transformers import AutoConfig
12
15
 
13
16
  from fusion_bench.utils.dtype import parse_dtype
17
+ from fusion_bench.utils.packages import import_object
14
18
 
15
19
  if TYPE_CHECKING:
16
20
  from transformers import PretrainedConfig
@@ -51,14 +55,19 @@ class LazyStateDict:
51
55
  """
52
56
 
53
57
  _local_path: str
58
+ """local path to the checkpoint."""
54
59
  _state_dict_cache: Optional[Dict]
60
+ """Cache for the state dict, if enabled."""
55
61
  _index_filename: Optional[str]
56
62
  _checkpoint_files: Optional[List[str]]
57
- _index: Optional[Dict]
63
+ _index: Optional[Dict[str, str]]
64
+ """Mapping of parameter names to checkpoint files."""
58
65
 
59
66
  def __init__(
60
67
  self,
61
68
  checkpoint: str,
69
+ meta_module_class: Optional[Type[nn.Module]] = None,
70
+ meta_module: Optional[nn.Module] = None,
62
71
  cache_state_dict: bool = False,
63
72
  torch_dtype: Optional[torch.dtype] = None,
64
73
  device: str = "cpu",
@@ -66,6 +75,37 @@ class LazyStateDict:
66
75
  hf_cache_dir: Optional[str] = None,
67
76
  hf_proxies: Optional[Dict] = None,
68
77
  ):
78
+ """
79
+ Args:
80
+ checkpoint (str): Path to the checkpoint file or directory.
81
+ meta_module_class (Type[nn.Module], optional): Class of the meta module to instantiate.
82
+ meta_module (nn.Module, optional): Pre-initialized meta module.
83
+ cache_state_dict (bool): Whether to cache the state dict in memory.
84
+ torch_dtype (torch.dtype, optional): The dtype to use for the tensors.
85
+ device (str): The device to load the tensors onto.
86
+ hf_revision (str, optional): The revision of the model to download from Hugging Face Hub.
87
+ hf_cache_dir (str, optional): The cache directory for Hugging Face models.
88
+ hf_proxies (Dict, optional): Proxies to use for downloading from Hugging Face Hub.
89
+ """
90
+ self.cache_state_dict = cache_state_dict
91
+ self.meta_module_class = meta_module_class
92
+ if isinstance(self.meta_module_class, str):
93
+ self.meta_module_class = import_object(self.meta_module_class)
94
+ self.meta_module = meta_module
95
+ if self.meta_module_class is not None:
96
+ if self.meta_module is not None:
97
+ raise ValueError(
98
+ "Cannot provide both meta_module_class and meta_module, please provide only one."
99
+ )
100
+ with init_empty_weights():
101
+ self.meta_module = self.meta_module_class.from_pretrained(
102
+ checkpoint,
103
+ torch_dtype=torch_dtype,
104
+ revision=hf_revision,
105
+ cache_dir=hf_cache_dir,
106
+ proxies=hf_proxies,
107
+ )
108
+
69
109
  self._checkpoint = checkpoint
70
110
  self._local_path = resolve_checkpoint_path(
71
111
  checkpoint,
@@ -78,10 +118,44 @@ class LazyStateDict:
78
118
  self._resolve_checkpoint_files(self._local_path)
79
119
  )
80
120
 
81
- if cache_state_dict:
82
- self._state_dict_cache = {}
121
+ if self._index is not None:
122
+ # if meta_module is provided, remove the keys that are not in the meta_module
123
+ if self.meta_module is not None:
124
+ meta_module_state_dict = self.meta_module.state_dict()
125
+ for key in tuple(self._index.keys()):
126
+ if key not in meta_module_state_dict:
127
+ self._index.pop(key)
128
+ if cache_state_dict:
129
+ self._state_dict_cache = {}
130
+ else:
131
+ self._state_dict_cache = None
132
+ elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
133
+ SAFE_WEIGHTS_NAME
134
+ ):
135
+ # let the keys of self._index be the keys of the state dict, the values are the checkpoint file
136
+ with safe_open(
137
+ self._checkpoint_files[0], framework="pt", device=device
138
+ ) as f:
139
+ self._index = {key: self._checkpoint_files[0] for key in f.keys()}
140
+ if cache_state_dict:
141
+ self._state_dict_cache = {}
142
+ else:
143
+ self._state_dict_cache = None
144
+ elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
145
+ WEIGHTS_NAME
146
+ ):
147
+ log.info(f"Loading full state dict from {WEIGHTS_NAME}")
148
+ self._state_dict_cache = torch.load(self._checkpoint_files[0])
149
+ # if meta_module is provided, remove the keys that are not in the meta_module
150
+ if self.meta_module is not None:
151
+ meta_module_state_dict = self.meta_module.state_dict()
152
+ for key in tuple(self._state_dict_cache.keys()):
153
+ if key not in meta_module_state_dict:
154
+ self._state_dict_cache.pop(key)
83
155
  else:
84
- self._state_dict_cache = None
156
+ raise ValueError(
157
+ f"Cannot determine the type of checkpoint, please provide a checkpoint path to a file containing a whole state dict with file name {WEIGHTS_NAME} or {SAFE_WEIGHTS_NAME}, or the index of a sharded checkpoint ending with `.index.json`."
158
+ )
85
159
 
86
160
  self._torch_dtype = parse_dtype(torch_dtype)
87
161
  self._device = device
@@ -94,7 +168,11 @@ class LazyStateDict:
94
168
  def config(self) -> "PretrainedConfig":
95
169
  return AutoConfig.from_pretrained(self._checkpoint)
96
170
 
97
- def state_dict(self) -> "LazyStateDict":
171
+ def state_dict(self, keep_vars: bool = False) -> "LazyStateDict":
172
+ """
173
+ Args:
174
+ keep_vars (bool): Ignored, as LazyStateDict does not support keep_vars. Just for compatibility.
175
+ """
98
176
  return self
99
177
 
100
178
  def _resolve_checkpoint_files(self, checkpoint: str):
@@ -152,6 +230,8 @@ class LazyStateDict:
152
230
  checkpoint_files = [
153
231
  os.path.join(checkpoint_folder, f) for f in checkpoint_files
154
232
  ]
233
+ else:
234
+ index = None
155
235
  return index, index_filename, checkpoint_files
156
236
 
157
237
  def _load_tensor_from_checkpoint_file(
@@ -210,6 +290,21 @@ class LazyStateDict:
210
290
  )
211
291
  return tensor
212
292
 
293
+ def __setitem__(self, key: str, value: torch.Tensor) -> None:
294
+ """
295
+ Set a tensor in the LazyStateDict. This will update the state dict cache if it is enabled.
296
+ """
297
+ assert key in list(
298
+ self.keys()
299
+ ), "KeyError: Cannot set a tensor for a key that does not exist in the LazyStateDict."
300
+ if self._state_dict_cache is not None:
301
+ self._state_dict_cache[key] = value
302
+ else:
303
+ log.warning(
304
+ "State dict cache is disabled, setting a tensor will not update the cache."
305
+ )
306
+ self._state_dict_cache = {key: value}
307
+
213
308
  def __contains__(self, key: str) -> bool:
214
309
  if self._state_dict_cache is not None and key in self._state_dict_cache:
215
310
  return True
@@ -248,21 +343,68 @@ class LazyStateDict:
248
343
  def __iter__(self) -> Iterator[str]:
249
344
  if self._index is not None:
250
345
  return iter(self._index)
251
- return iter(self._checkpoint_files)
346
+ elif self._state_dict_cache is not None:
347
+ return iter(self._state_dict_cache)
348
+ else:
349
+ raise RuntimeError(
350
+ "Unexpected error: cannot determine the keys in the state dict."
351
+ )
252
352
 
253
- def keys(self) -> List[str]:
254
- return list(self)
353
+ def keys(self) -> Iterator[str]:
354
+ for key in self:
355
+ yield key
255
356
 
256
- def values(self) -> List[torch.Tensor]:
257
- return [self[key] for key in self]
357
+ def values(self) -> Iterator[torch.Tensor]:
358
+ for key in self:
359
+ yield self[key]
258
360
 
259
361
  def items(self) -> Iterator[Tuple[str, torch.Tensor]]:
260
- return ((key, self[key]) for key in self)
362
+ for key in self:
363
+ yield key, self[key]
261
364
 
262
365
  def __repr__(self) -> str:
263
366
  if self._index is not None:
264
- return f"{self.__class__.__name__}(index={self._index})"
367
+ return f"{self.__class__.__name__}(keys={list(self.keys())})"
265
368
  else:
266
369
  return (
267
370
  f"{self.__class__.__name__}(checkpoint_files={self._checkpoint_files})"
268
371
  )
372
+
373
+ def get_parameter(self, target: str) -> torch.Tensor:
374
+ return self[target]
375
+
376
+ def get_submodule(self, target: str) -> nn.Module:
377
+ if self.meta_module is not None:
378
+ module: nn.Module = deepcopy(self.meta_module.get_submodule(target))
379
+ module.to_empty(device=self._device)
380
+ state_dict = {}
381
+ for name, _ in module.named_parameters():
382
+ state_dict[name] = self[f"{target}.{name}"]
383
+ module.load_state_dict(state_dict)
384
+ return module
385
+ else:
386
+ raise RuntimeError(
387
+ "Cannot get submodule because meta_module is not provided."
388
+ )
389
+
390
+ def load_state_dict(
391
+ self, state_dict: Dict[str, torch.Tensor], strict: bool = True
392
+ ) -> None:
393
+ """
394
+ Load a state dict into this LazyStateDict.
395
+ This method is only for compatibility with nn.Module and it overrides the cache of LazyStateDict.
396
+
397
+ Args:
398
+ state_dict (Dict[str, torch.Tensor]): The state dict to load.
399
+ strict (bool): Whether to enforce that all keys in the state dict are present in this LazyStateDict.
400
+ """
401
+ log.warning(
402
+ "Loading state dict into LazyStateDict is not recommended, as it may lead to unexpected behavior. "
403
+ "Use with caution."
404
+ )
405
+ if strict:
406
+ for key in state_dict:
407
+ if key not in self:
408
+ raise KeyError(f"Key {key} not found in LazyStateDict.")
409
+ for key, value in state_dict.items():
410
+ self[key] = value
@@ -1,6 +1,6 @@
1
1
  from typing import Iterable, List
2
2
 
3
- __all__ = ["first", "has_length", "join_list"]
3
+ __all__ = ["first", "has_length", "join_list", "attr_equal"]
4
4
 
5
5
 
6
6
  def first(iterable: Iterable):
@@ -23,3 +23,21 @@ def join_list(list_of_list: List[List]):
23
23
  for item in list_of_list:
24
24
  ans.extend(item)
25
25
  return ans
26
+
27
+
28
+ def attr_equal(obj, attr: str, value):
29
+ """
30
+ Check if the attribute of the object is equal to the given value.
31
+ Returns False if the attribute does not exist or is not equal to the value.
32
+
33
+ Args:
34
+ obj: The object to check.
35
+ attr (str): The attribute name to check.
36
+ value: The value to compare against.
37
+
38
+ Returns:
39
+ bool: True if the attribute exists and is equal to the value, False otherwise.
40
+ """
41
+ if not hasattr(obj, attr):
42
+ return False
43
+ return getattr(obj, attr) == value
@@ -62,6 +62,8 @@ class RankZeroLogger(logging.Logger):
62
62
  def _log(self, *args, **kwargs):
63
63
  if "stacklevel" in kwargs:
64
64
  kwargs["stacklevel"] += 1
65
+ else:
66
+ kwargs["stacklevel"] = 2
65
67
  return super()._log(*args, **kwargs)
66
68
 
67
69
  def is_global_zero(self):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusion_bench
3
- Version: 0.2.17
3
+ Version: 0.2.19
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
- fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
1
+ fusion_bench/__init__.py,sha256=vu3nMzKuiiHkbH13m8SOzj8qYU-n1PreBipWs_xjZig,1937
2
2
  fusion_bench/__main__.py,sha256=weUjxpP3ULnDgUxCehdbmoCM9cqfkhDhGB85tAF5qoE,81
3
3
  fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  fusion_bench/compat/method/__init__.py,sha256=A9pbskEYB4_ryD6sVrR0qI4eVYsbI7sExbhPeypP3fQ,5757
@@ -12,6 +12,7 @@ fusion_bench/compat/taskpool/base_pool.py,sha256=1AIZBxqUJgshq0Xo3Yo9es4b-8X8ksN
12
12
  fusion_bench/compat/taskpool/clip_image_classification.py,sha256=ZYZsbsE-fPzm6yafA0p-6wcDwVGryLmtXXtuEXeQbTY,7425
13
13
  fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py,sha256=JsdAE72V1C1eDcA1WCa0PIcSDTrGPclNKFDQ9G-hYts,5786
14
14
  fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2axIv8,41
15
+ fusion_bench/constants/banner.py,sha256=fuIO36ETKlS6a3wbwZn-rA2OswSCfOYyyhZ0Fnal1s4,1656
15
16
  fusion_bench/constants/clip_vision.py,sha256=k0NRfiacxRaswdxUj91-e3jcP1u-RmvsaaYdqohcQVU,310
16
17
  fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
17
18
  fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
@@ -42,12 +43,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
42
43
  fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
43
44
  fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
44
45
  fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- fusion_bench/method/__init__.py,sha256=xry6_2sAWT_qeNFgcLTE7lBWWWjGhuljrJFeWL1NBXg,7552
46
+ fusion_bench/method/__init__.py,sha256=TMELBu1IdKN86Id1rlNlr-vqsdArti_6mlKLfobHoL4,7888
46
47
  fusion_bench/method/base_algorithm.py,sha256=UuITuGnSskcKEwUVINuPoWJUwqGm9AIgyQIOCu8BMks,1162
47
48
  fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
48
49
  fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
49
50
  fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
50
- fusion_bench/method/simple_average.py,sha256=vVzlfdf0mPHeY3VeOLrcWI4sWoLBW0gaX0lusjePVyQ,4539
51
+ fusion_bench/method/simple_average.py,sha256=A_VYtHhECcxY0_Mppe5ThOY-ip6XUvvtPHsaQKSmDPc,4971
51
52
  fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
52
53
  fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
53
54
  fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
@@ -88,6 +89,12 @@ fusion_bench/method/doge_ta/__init__.py,sha256=dixO0i5fmhgC_W2_DAQ4PzYnkMCZX5D8t
88
89
  fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
89
90
  fusion_bench/method/doge_ta/doge_ta.py,sha256=ec0qIq3F72nhbCVlfqdk1PYFM7QIlfMofeVFVvmDKiE,13785
90
91
  fusion_bench/method/doge_ta/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
92
+ fusion_bench/method/expert_sparsity/__init__.py,sha256=nt7k5cKqA2Bax1aM93ODwsEuibZ_hdFgQsUos_8h2v8,271
93
+ fusion_bench/method/expert_sparsity/mixtral/__init__.py,sha256=FyKDZIyYUnqvGIdJ5BS639UpzSBj11g28ATHs1Yczdk,545
94
+ fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py,sha256=e4fsXKSjCdmK-sThX6REk_d1hf-UolRLssQr7b6jD-M,5597
95
+ fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py,sha256=GJVIose_Duk4C6Re4LtaxSzGjR8XLGGlhLhsGMECwjw,4960
96
+ fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py,sha256=-0qWYkvHqKouJynn-kT907JQtiMLChtppOTL4SUYR9M,5090
97
+ fusion_bench/method/expert_sparsity/utils/calibration_data.py,sha256=jEWW60qXrnAyiAPz8gbpvQ4hFeL1P1ykoIzoydAaDAk,5459
91
98
  fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqvesqk2NJw5AY_1ztLVE,225
92
99
  fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
93
100
  fusion_bench/method/fisher_merging/fisher_merging.py,sha256=OiceW0bqvnzGjIyIjd0A55ckXImDfEvi-Nk6td0sFFw,20892
@@ -108,11 +115,13 @@ fusion_bench/method/gossip/utils.py,sha256=ggMPRdxs--U2sV670oimX7jo8NGBX5Oq8Mlpr
108
115
  fusion_bench/method/isotropic_merging/__init__.py,sha256=yyx1Exfrf_4CtTjml1CIplFeeEDsSUk2Zc0AJ98ST9M,584
109
116
  fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4Rd0WcEPsYvQhBSCg,3773
110
117
  fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
118
+ fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
+ fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
111
120
  fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
112
121
  fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
113
122
  fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
114
123
  fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
115
- fusion_bench/method/linear/simple_average_for_llama.py,sha256=7JlVrmTMmrePvNGnZNoxSuCSq2Vu7cPQzjGC3WWUXBE,2079
124
+ fusion_bench/method/linear/simple_average_for_llama.py,sha256=OcjvfG5nuUzdo3P4Xi1mO6ApRu51YAUYXG5lAMeD6rg,2711
116
125
  fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
117
126
  fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
118
127
  fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=ys_td1IeL3bzPTE0Cixlj2JooCaB7qseRwSDwroAk5A,18777
@@ -199,7 +208,8 @@ fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1
199
208
  fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
200
209
  fusion_bench/method/task_singular_vector/utils/TSVC_utils.py,sha256=FytKbal48EW6iGIA-2zV7QSVbYTVflXr4Mr56q0W75k,2286
201
210
  fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsNVpceHamQytZi-q4wzrCmGGQCYOm67mI,29146
202
- fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Pgthb9Ld1x0Qis1wKWottwgzlBcyuzByFZCMIoI6Fys,240
211
+ fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
212
+ fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
203
213
  fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
204
214
  fusion_bench/method/ties_merging/ties_merging.py,sha256=GAlomW4oTePXd57TvogQXoliNnEto1_QVXVGVrU1QNc,5807
205
215
  fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
@@ -243,7 +253,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
243
253
  fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
244
254
  fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
245
255
  fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
246
- fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=fO8lF8YWwoe43sVVOqHW9Ike7x-924-I6QQgZqx9EgA,6505
256
+ fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=7-mUWVGVsXyljH_06CmIyReClKx_xVjy5zeXTJcLQIk,8085
247
257
  fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
248
258
  fusion_bench/modelpool/clip_vision/modelpool.py,sha256=ADgzslXwYd95x42V26XvgS09WEKGfhH_AYuQmWKdT0w,5887
249
259
  fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
@@ -253,7 +263,7 @@ fusion_bench/modelpool/seq2seq_lm/modelpool.py,sha256=IjLHi8qycWOA4Ul9jnqR48evgV
253
263
  fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=k-t4RetcDlbkRkPHNuyeV3pQEcJnFRjd9Wp5tFBb-G8,128
254
264
  fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
255
265
  fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIKAmGJwfrNSuWtxzJ_-ME0gQksEYY2y-jVt7P82Qs0,3434
256
- fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
266
+ fusion_bench/models/__init__.py,sha256=w2QbRl-nIHMHNCl9X46f2CD6oqZfEDAxGRs4G9cw2nw,145
257
267
  fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
258
268
  fusion_bench/models/parameter_dict.py,sha256=RBAXZ-PFLxy3eHxQqWLEvjKIR1uTHBWdKP0XXMNGmQg,3635
259
269
  fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
@@ -264,6 +274,11 @@ fusion_bench/models/we_moe.py,sha256=0U-m3mhzb4vFLIzn2jd7j_SQOF9lot4ddzq0l_VPp9g
264
274
  fusion_bench/models/chat_templates/__init__.py,sha256=v9vKrCfBgZ3UsMBQatZv1Z-ayPualBl5ciV0aO3p3iY,85
265
275
  fusion_bench/models/chat_templates/llama_3_Instruct.py,sha256=E6grNPECr0r1KDPIGW_DmpKQw5-Dh5WbMiTaHWDXwXo,4008
266
276
  fusion_bench/models/chat_templates/load_tokenizer.py,sha256=yRs3dB2tZo0Oh-YLJcMZzWSQ5Ps8KXrggZNb5F-aBuM,1400
277
+ fusion_bench/models/expert_sparsity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
278
+ fusion_bench/models/expert_sparsity/mixtral/__init__.py,sha256=3L_dcXW3op6ichd3GTlrTEZF_UA57Pyr13SlQRer7lg,439
279
+ fusion_bench/models/expert_sparsity/mixtral/dataset.py,sha256=1-OxRbK-TRaQBJuOfnuzQKSV_55mMRV6iqKWBuX5BIM,1350
280
+ fusion_bench/models/expert_sparsity/mixtral/modeling_mixtral.py,sha256=uGbn69toZ3ldHZKfwXNBijjcPQXeDdXpwJv3HnVwUbc,8252
281
+ fusion_bench/models/expert_sparsity/mixtral/wrapper.py,sha256=1zACEwXDNbi9uwI96oD84YrCsh6b8yh25ZjP3q37muo,10167
267
282
  fusion_bench/models/linearized/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
283
  fusion_bench/models/linearized/linearized_model_utils.py,sha256=5yKXReQHIwDttzT_oXwY_iIpaG1zIU0Nv93BWmmOqrg,3212
269
284
  fusion_bench/models/linearized/vision_model.py,sha256=HhbhtyoLD1qVvh1Sgl_beYF2W7AvMevmUy4Jx2XlcsY,4636
@@ -326,7 +341,7 @@ fusion_bench/optim/lr_scheduler/utils/__init__.py,sha256=GfZk9VYL3cFE1Qy2xQpGc1G
326
341
  fusion_bench/optim/lr_scheduler/utils/visualization.py,sha256=Ea1n9ElNizAe0iUnjynyfteuZunv2-UBMN_NfEU2imA,3490
327
342
  fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31AsmvwvNvJw,508
328
343
  fusion_bench/programs/base_program.py,sha256=0dX_KcMWASo53pr-ldzfUBWIjEXy6oeDWZBrfc7FIk8,195
329
- fusion_bench/programs/fabric_fusion_program.py,sha256=r-CuvS_OxADXjQgqNm2E_poSvIx1GCMjcyRCMWrwU1w,13427
344
+ fusion_bench/programs/fabric_fusion_program.py,sha256=978t9Fw9kvw-Il7rJLR2jNI1OfSxkhq1c5-5D4BgnYU,13813
330
345
  fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
331
346
  fusion_bench/scripts/cli.py,sha256=hw32XtmixFxYXwgAY7iRBMzma_XQjdf_FxPiXKL6dIc,1154
332
347
  fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
@@ -370,7 +385,7 @@ fusion_bench/tasks/clip_classification/fer2013.py,sha256=_oc2fdV308ywcb16rLZxBAd
370
385
  fusion_bench/tasks/clip_classification/flower102.py,sha256=p_JMs6HCCPZBKe7PTXt0WABsd-KcgmpBkxDSlJJaVVY,2096
371
386
  fusion_bench/tasks/clip_classification/food101.py,sha256=Oepvws5byGxrHswXt3ILG3UEPiZaFXYqK1yJqm1uYVE,1968
372
387
  fusion_bench/tasks/clip_classification/gtsrb.py,sha256=Dsaz-XNz6oA9nNTF2C2iXmmhhVz-gsw-WcGuFTqjzl4,2677
373
- fusion_bench/tasks/clip_classification/imagenet.py,sha256=Az7gnFuecVCDkP3mMjiLwOgrYAf_cxz177kkdivComU,38815
388
+ fusion_bench/tasks/clip_classification/imagenet.py,sha256=EhZ2iYAc8oApr5BU_vgM3cDY879anTkvb-5hfi-B7m4,48826
374
389
  fusion_bench/tasks/clip_classification/kmnist.py,sha256=Ohce6aVaXkPnviDaZYXANMhhBNHZXO3FnXYxYG-ISVg,311
375
390
  fusion_bench/tasks/clip_classification/mnist.py,sha256=-gQpHz_kCXmUOtAsM8FBUYFjlwcbAgnqpuVtRfCJ3JM,129
376
391
  fusion_bench/tasks/clip_classification/mongo_leaf_disease.py,sha256=L_2IgnzbZdGZrX27VNGu1rC-N3Aj4fetIXB9HM1QZkI,519
@@ -389,26 +404,26 @@ fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py,sha256=-B1wqVGp3wZ
389
404
  fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py,sha256=sVihXHbqwi8IlDpiIxzvmDv-Ob7WKvi23GIRYbBUKOc,1833
390
405
  fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py,sha256=GhRmGmcJGF4oVgZQarsBtx8GNKrNEZUkrillNz3iBuY,13183
391
406
  fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py,sha256=mKMTXIr5o-BqS_Hvv1bbMvvjQLLeKNVw7BKS9qgQ8Dw,1890
392
- fusion_bench/utils/__init__.py,sha256=E_K0a1V761KJCn623tL23QpqcnngIcLYo_6WK8Y0Xtc,447
407
+ fusion_bench/utils/__init__.py,sha256=XbmQGNmzVKnPLodevlM15iEIXCFx3hled7Vni4fzPYc,504
393
408
  fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
394
409
  fusion_bench/utils/cache_utils.py,sha256=rU8x4-RFUtaCZWKd4Kft_7xgPTr1bpXnqUDMkrIdpj8,1653
395
410
  fusion_bench/utils/data.py,sha256=L3aS2OwlpiXoILdPlo-j03gJh4s2LpAJw6fw9uY5G7c,6571
396
411
  fusion_bench/utils/devices.py,sha256=MIAxbEGinN-QU4W1g3-YKkJsteHQrwhbLqkmbzX1W3U,8035
397
412
  fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
398
- fusion_bench/utils/dtype.py,sha256=kYoEGqsXitnwOU3W7ivqhQ0OjdI7MGu1VsyMJS4cSyQ,4299
413
+ fusion_bench/utils/dtype.py,sha256=qtsDFfm5XTuxsjvVg-orpWvbhebCvyivzzZbLg-xiaA,4327
399
414
  fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
400
- fusion_bench/utils/fabric.py,sha256=X2B_QPT2kqDPceQo3tp4XYAKbBpIs07w94Je_h2_81w,355
415
+ fusion_bench/utils/fabric.py,sha256=dF0Aj8NmVir30io6WcL5gpWmbQSPlEADvw_yFxFx1sQ,613
401
416
  fusion_bench/utils/functools.py,sha256=7_tYJ2WD88_2DDuOOj5aZz3cYuslYH5tsVyIgCeLtmk,1318
402
417
  fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqkoSGk,1174
403
418
  fusion_bench/utils/instantiate_utils.py,sha256=57D8YP25OO-ArltOSsHDKtnNcA44m1yAq-1wKZc2YVI,17523
404
419
  fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
405
420
  fusion_bench/utils/lazy_imports.py,sha256=v5l9cpHXPMaz1IVBmB5oOqefYr9vA3XvP340xT7Wy18,2796
406
- fusion_bench/utils/lazy_state_dict.py,sha256=0KBd3j6A_T_9-m8t68tSDpQZB_MWk9-cwho3O_8PkXY,10150
407
- fusion_bench/utils/misc.py,sha256=Rgec7eKcGIcp9BaFVdm2pzx0J-L8AyX5qWuiYNTGvTc,530
421
+ fusion_bench/utils/lazy_state_dict.py,sha256=Hu8PkhbJcUikXJxWUJ7vabu2uDbnUUF6UsRS0k8i71U,16841
422
+ fusion_bench/utils/misc.py,sha256=Qc3_H8UMooOp81Ow89zqvM1sNPIybq1cbq7s4-4lsfU,1082
408
423
  fusion_bench/utils/packages.py,sha256=L64paDi1SmeT3gRvRV6LaqB8AeGdzIYWIRI31qSQbSk,2110
409
424
  fusion_bench/utils/parameters.py,sha256=2vs8vo2o-nRA9NOMOYFye-X8-aHQZoYe54tM6n0r0RE,11757
410
425
  fusion_bench/utils/path.py,sha256=hRA1CPHNnTYBUmzbftH77sHvn4aTuybEK5Tth1skP-k,531
411
- fusion_bench/utils/pylogger.py,sha256=05gF2DNtdQG_Ldw029ufj4_IprBpciMVOznwpgaJUpI,3282
426
+ fusion_bench/utils/pylogger.py,sha256=amlRsdqHpOjxmBl6f9TA8y0LaWelEWgQNcGgEGsVOIc,3333
412
427
  fusion_bench/utils/rich_utils.py,sha256=B8DhAYuVp23pG6ZnnYrUhcL-ikHZoQeTNqlM7u4pwwU,5786
413
428
  fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
414
429
  fusion_bench/utils/state_dict_arithmetic.py,sha256=iz5YYhMJpg2-lBLBY8E1onV4i_GkRhJOGn2DjhLBbYE,11390
@@ -422,7 +437,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
422
437
  fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
423
438
  fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
424
439
  fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
425
- fusion_bench-0.2.17.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
440
+ fusion_bench-0.2.19.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
426
441
  fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
427
442
  fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
428
443
  fusion_bench_config/fabric_model_fusion.yaml,sha256=YwJx_aUXm4ca4_mVItKVUOesMvmBBRGudQIOqgc1EP8,974
@@ -525,6 +540,7 @@ fusion_bench_config/fabric/llama_ddp.yaml,sha256=bOOuK5BPKmScE6yh5xY59qlawlMk2sR
525
540
  fusion_bench_config/fabric/llama_fsdp.yaml,sha256=pTvz0k79dSOVAAlvU0T1kNd8TNCwz2FGjDOujBtQ_Ks,574
526
541
  fusion_bench_config/fabric/llama_peft_fsdp.yaml,sha256=AosSmY4624iahKbTWY681BsZTC1ul78x9aHZ9zHS81s,579
527
542
  fusion_bench_config/fabric/loggers/csv_logger.yaml,sha256=Pv8I-xbxrpTb_fwtDiUtCAEoCZ8QYCLu2GeJNzb3Z3c,373
543
+ fusion_bench_config/fabric/loggers/mlflow_logger.yaml,sha256=iu_3Y57hRuc-FjJGoTDlcRqxq3K6U2vHBaBvhOPp8hk,71
528
544
  fusion_bench_config/fabric/loggers/tensorboard_logger.yaml,sha256=w9ZP1i8lRYQFslzEM98PmbcFhhn5dXReSJhLOdEi-do,381
529
545
  fusion_bench_config/fabric/loggers/wandb_logger.yaml,sha256=eF4slc6QPRuMCMJVeFHNJirsGiB15WQIxNgioXNwezc,142
530
546
  fusion_bench_config/fabric/strategy/deepspeed.yaml,sha256=zcSUeHVaATy92oTTRx3_hWQkCB3BPR7YOIt_U1gimCU,343
@@ -567,6 +583,8 @@ fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKl
567
583
  fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
568
584
  fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=Ih9dqifpnvxW2QfJqp8Q8S8W1k7VZG9ulyPxkcuaWsw,54
569
585
  fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
586
+ fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
587
+ fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
570
588
  fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml,sha256=rl7kfVvdo2pG-DnglQUbjzkyBqnq1FpfoSDSjFtdLwk,633
571
589
  fusion_bench_config/method/fisher_merging/fisher_merging.yaml,sha256=B1wrv9mhaOID4KcAUEMZNxlvY3tR3Q3UGualFslvx-Y,475
572
590
  fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml,sha256=AE7XZqRDj4__J_ipEcjPs7qTB2J3xLQyFRlq1W4iHFE,563
@@ -778,6 +796,7 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
778
796
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=yC2U_IoBAhawgSahY_mdi7ea5kJ2SSRPJ2FM-bA-E9M,510
779
797
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=a2nviqKSRNoQScYVbj5buq0PbUzmYJwNWdPBUoLaeV8,386
780
798
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=G6yvZuWOKb75RLn6tu2LPnwHUyvoxPfL_wqb_B11aZo,549
799
+ fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml,sha256=HZXjqbZKpSZCHb-G8qjj03PcvXg_8mrAuewDHZp0oEw,263
781
800
  fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
782
801
  fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
783
802
  fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
@@ -858,8 +877,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
858
877
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
859
878
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
860
879
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
861
- fusion_bench-0.2.17.dist-info/METADATA,sha256=cBTM1-Dfm6gdMfQ6vqrxpg7o5abvCwn3b1zb4KUSgHY,21966
862
- fusion_bench-0.2.17.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
863
- fusion_bench-0.2.17.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
864
- fusion_bench-0.2.17.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
865
- fusion_bench-0.2.17.dist-info/RECORD,,
880
+ fusion_bench-0.2.19.dist-info/METADATA,sha256=5pl4dtlAYklMMiMLBeKNaHqCQRd7sLSct7aIh9JIoGY,21966
881
+ fusion_bench-0.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
882
+ fusion_bench-0.2.19.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
883
+ fusion_bench-0.2.19.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
884
+ fusion_bench-0.2.19.dist-info/RECORD,,
@@ -0,0 +1,2 @@
1
+ # https://mlflow.org/
2
+ _target_: lightning.pytorch.loggers.MLFlowLogger
@@ -0,0 +1,6 @@
1
+ Original repo: https://github.com/Lucky-Lance/Expert_Sparsity
2
+
3
+ Reference:
4
+ Not All Experts are Equal: Efficient Expert Pruning and Skipping for Mixture-of-Experts Large Language Models.
5
+ ACL 2024.
6
+ http://arxiv.org/abs/2402.14800
@@ -0,0 +1,17 @@
1
+ _target_: fusion_bench.method.LayerWisePruningForMixtral
2
+ num_preserved_experts: 4
3
+ # c4 or math
4
+ # corresponding to the keys of `fusion_bench.method.expert_sparsity.utils.calibration_data.DATASETS`
5
+ calib_set: c4
6
+ # Maximal sequence length of each sample in calibration set
7
+ max_block_size: 2048
8
+ # Number of sequences in calibration set. If set to 0 or negative, the whole dataset will be used
9
+ n_blocks_for_stat: 128
10
+ # Batch size for model inference
11
+ batch_size: 8
12
+ # Number of workers in dataloader
13
+ num_workers: 8
14
+ # Random seed
15
+ seed: 42
16
+ # Path to save the pruned model
17
+ model_save_path: "{log_dir}/pruned_model"
@@ -0,0 +1,11 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+ _recursive_: false
3
+
4
+ load_lazy: false
5
+ models:
6
+ _pretrained_: Qwen/Qwen2.5-1.5B
7
+ expert_1: Qwen/Qwen2.5-Math-1.5B
8
+ expert_2: Qwen/Qwen2.5-Coder-1.5B
9
+ model_kwargs:
10
+ torch_dtype: bfloat16
11
+ tokenizer: Qwen/Qwen2.5-1.5B