fusion-bench 0.2.17__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/__init__.py +6 -0
- fusion_bench/constants/banner.py +12 -0
- fusion_bench/method/__init__.py +11 -0
- fusion_bench/method/expert_sparsity/__init__.py +10 -0
- fusion_bench/method/expert_sparsity/mixtral/__init__.py +23 -0
- fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py +175 -0
- fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py +159 -0
- fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py +173 -0
- fusion_bench/method/expert_sparsity/utils/calibration_data.py +153 -0
- fusion_bench/method/knots/__init__.py +0 -0
- fusion_bench/method/knots/knots_utils.py +23 -0
- fusion_bench/method/linear/simple_average_for_llama.py +17 -3
- fusion_bench/method/simple_average.py +10 -0
- fusion_bench/method/task_singular_vector/utils/__init__.py +1 -0
- fusion_bench/method/task_singular_vector/utils/task_singular_interference.py +41 -0
- fusion_bench/modelpool/causal_lm/causal_lm.py +45 -11
- fusion_bench/models/__init__.py +1 -0
- fusion_bench/models/expert_sparsity/__init__.py +0 -0
- fusion_bench/models/expert_sparsity/mixtral/__init__.py +15 -0
- fusion_bench/models/expert_sparsity/mixtral/dataset.py +40 -0
- fusion_bench/models/expert_sparsity/mixtral/modeling_mixtral.py +207 -0
- fusion_bench/models/expert_sparsity/mixtral/wrapper.py +268 -0
- fusion_bench/programs/fabric_fusion_program.py +12 -8
- fusion_bench/tasks/clip_classification/imagenet.py +1008 -2004
- fusion_bench/utils/__init__.py +3 -2
- fusion_bench/utils/dtype.py +2 -1
- fusion_bench/utils/fabric.py +11 -4
- fusion_bench/utils/lazy_state_dict.py +155 -13
- fusion_bench/utils/misc.py +19 -1
- fusion_bench/utils/pylogger.py +2 -0
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/METADATA +1 -1
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/RECORD +40 -21
- fusion_bench_config/fabric/loggers/mlflow_logger.yaml +2 -0
- fusion_bench_config/method/expert_sparsity/README.md +6 -0
- fusion_bench_config/method/expert_sparsity/mixtral.yaml +17 -0
- fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +11 -0
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/WHEEL +0 -0
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.17.dist-info → fusion_bench-0.2.19.dist-info}/top_level.txt +0 -0
fusion_bench/utils/__init__.py
CHANGED
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
import importlib
|
|
3
3
|
from typing import Iterable
|
|
4
4
|
|
|
5
|
-
from . import data, functools, path
|
|
5
|
+
from . import data, functools, path, pylogger
|
|
6
6
|
from .cache_utils import *
|
|
7
7
|
from .devices import *
|
|
8
8
|
from .dtype import parse_dtype
|
|
9
9
|
from .fabric import seed_everything_by_time
|
|
10
10
|
from .instantiate_utils import instantiate, is_instantiable
|
|
11
|
+
from .json import load_from_json, save_to_json
|
|
12
|
+
from .lazy_state_dict import LazyStateDict
|
|
11
13
|
from .misc import *
|
|
12
14
|
from .packages import import_object
|
|
13
15
|
from .parameters import *
|
|
14
16
|
from .timer import timeit_context
|
|
15
|
-
from .lazy_state_dict import LazyStateDict
|
fusion_bench/utils/dtype.py
CHANGED
|
@@ -13,6 +13,7 @@ from transformers.utils import (
|
|
|
13
13
|
PRECISION_STR_TO_DTYPE: Dict[str, torch.dtype] = {
|
|
14
14
|
"fp16": torch.float16,
|
|
15
15
|
"float16": torch.float16,
|
|
16
|
+
"half": torch.float16,
|
|
16
17
|
"bf16": torch.bfloat16,
|
|
17
18
|
"bfloat16": torch.bfloat16,
|
|
18
19
|
"float": torch.float32,
|
|
@@ -50,7 +51,7 @@ def parse_dtype(dtype: Optional[str]):
|
|
|
50
51
|
|
|
51
52
|
dtype = dtype.strip('"')
|
|
52
53
|
if dtype not in PRECISION_STR_TO_DTYPE:
|
|
53
|
-
raise ValueError(f"Unsupported dtype: {
|
|
54
|
+
raise ValueError(f"Unsupported dtype string: {dtype}")
|
|
54
55
|
|
|
55
56
|
dtype = PRECISION_STR_TO_DTYPE[dtype]
|
|
56
57
|
return dtype
|
fusion_bench/utils/fabric.py
CHANGED
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
import time
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import lightning as L
|
|
4
5
|
|
|
6
|
+
from fusion_bench.utils.pylogger import getRankZeroLogger
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
log = getRankZeroLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def seed_everything_by_time(fabric: Optional[L.Fabric] = None):
|
|
7
12
|
"""
|
|
8
13
|
Set seed for all processes by time.
|
|
9
14
|
"""
|
|
10
15
|
# set seed for all processes
|
|
11
|
-
if fabric.is_global_zero:
|
|
16
|
+
if fabric is None or fabric.is_global_zero:
|
|
12
17
|
seed = int(time.time())
|
|
13
18
|
else:
|
|
14
19
|
seed = None
|
|
15
|
-
fabric
|
|
16
|
-
|
|
20
|
+
if fabric is not None:
|
|
21
|
+
log.debug(f"Broadcasting seed `{seed}` to all processes")
|
|
22
|
+
fabric.barrier()
|
|
23
|
+
seed = fabric.broadcast(seed, src=0)
|
|
17
24
|
L.seed_everything(seed)
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Tuple, Type
|
|
5
6
|
|
|
6
7
|
import torch
|
|
8
|
+
from accelerate import init_empty_weights
|
|
7
9
|
from accelerate.utils.constants import SAFE_WEIGHTS_NAME, WEIGHTS_NAME
|
|
8
10
|
from huggingface_hub import snapshot_download
|
|
9
11
|
from safetensors import safe_open
|
|
10
12
|
from safetensors.torch import load_file
|
|
13
|
+
from torch import nn
|
|
11
14
|
from transformers import AutoConfig
|
|
12
15
|
|
|
13
16
|
from fusion_bench.utils.dtype import parse_dtype
|
|
17
|
+
from fusion_bench.utils.packages import import_object
|
|
14
18
|
|
|
15
19
|
if TYPE_CHECKING:
|
|
16
20
|
from transformers import PretrainedConfig
|
|
@@ -51,14 +55,19 @@ class LazyStateDict:
|
|
|
51
55
|
"""
|
|
52
56
|
|
|
53
57
|
_local_path: str
|
|
58
|
+
"""local path to the checkpoint."""
|
|
54
59
|
_state_dict_cache: Optional[Dict]
|
|
60
|
+
"""Cache for the state dict, if enabled."""
|
|
55
61
|
_index_filename: Optional[str]
|
|
56
62
|
_checkpoint_files: Optional[List[str]]
|
|
57
|
-
_index: Optional[Dict]
|
|
63
|
+
_index: Optional[Dict[str, str]]
|
|
64
|
+
"""Mapping of parameter names to checkpoint files."""
|
|
58
65
|
|
|
59
66
|
def __init__(
|
|
60
67
|
self,
|
|
61
68
|
checkpoint: str,
|
|
69
|
+
meta_module_class: Optional[Type[nn.Module]] = None,
|
|
70
|
+
meta_module: Optional[nn.Module] = None,
|
|
62
71
|
cache_state_dict: bool = False,
|
|
63
72
|
torch_dtype: Optional[torch.dtype] = None,
|
|
64
73
|
device: str = "cpu",
|
|
@@ -66,6 +75,37 @@ class LazyStateDict:
|
|
|
66
75
|
hf_cache_dir: Optional[str] = None,
|
|
67
76
|
hf_proxies: Optional[Dict] = None,
|
|
68
77
|
):
|
|
78
|
+
"""
|
|
79
|
+
Args:
|
|
80
|
+
checkpoint (str): Path to the checkpoint file or directory.
|
|
81
|
+
meta_module_class (Type[nn.Module], optional): Class of the meta module to instantiate.
|
|
82
|
+
meta_module (nn.Module, optional): Pre-initialized meta module.
|
|
83
|
+
cache_state_dict (bool): Whether to cache the state dict in memory.
|
|
84
|
+
torch_dtype (torch.dtype, optional): The dtype to use for the tensors.
|
|
85
|
+
device (str): The device to load the tensors onto.
|
|
86
|
+
hf_revision (str, optional): The revision of the model to download from Hugging Face Hub.
|
|
87
|
+
hf_cache_dir (str, optional): The cache directory for Hugging Face models.
|
|
88
|
+
hf_proxies (Dict, optional): Proxies to use for downloading from Hugging Face Hub.
|
|
89
|
+
"""
|
|
90
|
+
self.cache_state_dict = cache_state_dict
|
|
91
|
+
self.meta_module_class = meta_module_class
|
|
92
|
+
if isinstance(self.meta_module_class, str):
|
|
93
|
+
self.meta_module_class = import_object(self.meta_module_class)
|
|
94
|
+
self.meta_module = meta_module
|
|
95
|
+
if self.meta_module_class is not None:
|
|
96
|
+
if self.meta_module is not None:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
"Cannot provide both meta_module_class and meta_module, please provide only one."
|
|
99
|
+
)
|
|
100
|
+
with init_empty_weights():
|
|
101
|
+
self.meta_module = self.meta_module_class.from_pretrained(
|
|
102
|
+
checkpoint,
|
|
103
|
+
torch_dtype=torch_dtype,
|
|
104
|
+
revision=hf_revision,
|
|
105
|
+
cache_dir=hf_cache_dir,
|
|
106
|
+
proxies=hf_proxies,
|
|
107
|
+
)
|
|
108
|
+
|
|
69
109
|
self._checkpoint = checkpoint
|
|
70
110
|
self._local_path = resolve_checkpoint_path(
|
|
71
111
|
checkpoint,
|
|
@@ -78,10 +118,44 @@ class LazyStateDict:
|
|
|
78
118
|
self._resolve_checkpoint_files(self._local_path)
|
|
79
119
|
)
|
|
80
120
|
|
|
81
|
-
if
|
|
82
|
-
|
|
121
|
+
if self._index is not None:
|
|
122
|
+
# if meta_module is provided, remove the keys that are not in the meta_module
|
|
123
|
+
if self.meta_module is not None:
|
|
124
|
+
meta_module_state_dict = self.meta_module.state_dict()
|
|
125
|
+
for key in tuple(self._index.keys()):
|
|
126
|
+
if key not in meta_module_state_dict:
|
|
127
|
+
self._index.pop(key)
|
|
128
|
+
if cache_state_dict:
|
|
129
|
+
self._state_dict_cache = {}
|
|
130
|
+
else:
|
|
131
|
+
self._state_dict_cache = None
|
|
132
|
+
elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
|
|
133
|
+
SAFE_WEIGHTS_NAME
|
|
134
|
+
):
|
|
135
|
+
# let the keys of self._index be the keys of the state dict, the values are the checkpoint file
|
|
136
|
+
with safe_open(
|
|
137
|
+
self._checkpoint_files[0], framework="pt", device=device
|
|
138
|
+
) as f:
|
|
139
|
+
self._index = {key: self._checkpoint_files[0] for key in f.keys()}
|
|
140
|
+
if cache_state_dict:
|
|
141
|
+
self._state_dict_cache = {}
|
|
142
|
+
else:
|
|
143
|
+
self._state_dict_cache = None
|
|
144
|
+
elif len(self._checkpoint_files) == 1 and self._checkpoint_files[0].endswith(
|
|
145
|
+
WEIGHTS_NAME
|
|
146
|
+
):
|
|
147
|
+
log.info(f"Loading full state dict from {WEIGHTS_NAME}")
|
|
148
|
+
self._state_dict_cache = torch.load(self._checkpoint_files[0])
|
|
149
|
+
# if meta_module is provided, remove the keys that are not in the meta_module
|
|
150
|
+
if self.meta_module is not None:
|
|
151
|
+
meta_module_state_dict = self.meta_module.state_dict()
|
|
152
|
+
for key in tuple(self._state_dict_cache.keys()):
|
|
153
|
+
if key not in meta_module_state_dict:
|
|
154
|
+
self._state_dict_cache.pop(key)
|
|
83
155
|
else:
|
|
84
|
-
|
|
156
|
+
raise ValueError(
|
|
157
|
+
f"Cannot determine the type of checkpoint, please provide a checkpoint path to a file containing a whole state dict with file name {WEIGHTS_NAME} or {SAFE_WEIGHTS_NAME}, or the index of a sharded checkpoint ending with `.index.json`."
|
|
158
|
+
)
|
|
85
159
|
|
|
86
160
|
self._torch_dtype = parse_dtype(torch_dtype)
|
|
87
161
|
self._device = device
|
|
@@ -94,7 +168,11 @@ class LazyStateDict:
|
|
|
94
168
|
def config(self) -> "PretrainedConfig":
|
|
95
169
|
return AutoConfig.from_pretrained(self._checkpoint)
|
|
96
170
|
|
|
97
|
-
def state_dict(self) -> "LazyStateDict":
|
|
171
|
+
def state_dict(self, keep_vars: bool = False) -> "LazyStateDict":
|
|
172
|
+
"""
|
|
173
|
+
Args:
|
|
174
|
+
keep_vars (bool): Ignored, as LazyStateDict does not support keep_vars. Just for compatibility.
|
|
175
|
+
"""
|
|
98
176
|
return self
|
|
99
177
|
|
|
100
178
|
def _resolve_checkpoint_files(self, checkpoint: str):
|
|
@@ -152,6 +230,8 @@ class LazyStateDict:
|
|
|
152
230
|
checkpoint_files = [
|
|
153
231
|
os.path.join(checkpoint_folder, f) for f in checkpoint_files
|
|
154
232
|
]
|
|
233
|
+
else:
|
|
234
|
+
index = None
|
|
155
235
|
return index, index_filename, checkpoint_files
|
|
156
236
|
|
|
157
237
|
def _load_tensor_from_checkpoint_file(
|
|
@@ -210,6 +290,21 @@ class LazyStateDict:
|
|
|
210
290
|
)
|
|
211
291
|
return tensor
|
|
212
292
|
|
|
293
|
+
def __setitem__(self, key: str, value: torch.Tensor) -> None:
|
|
294
|
+
"""
|
|
295
|
+
Set a tensor in the LazyStateDict. This will update the state dict cache if it is enabled.
|
|
296
|
+
"""
|
|
297
|
+
assert key in list(
|
|
298
|
+
self.keys()
|
|
299
|
+
), "KeyError: Cannot set a tensor for a key that does not exist in the LazyStateDict."
|
|
300
|
+
if self._state_dict_cache is not None:
|
|
301
|
+
self._state_dict_cache[key] = value
|
|
302
|
+
else:
|
|
303
|
+
log.warning(
|
|
304
|
+
"State dict cache is disabled, setting a tensor will not update the cache."
|
|
305
|
+
)
|
|
306
|
+
self._state_dict_cache = {key: value}
|
|
307
|
+
|
|
213
308
|
def __contains__(self, key: str) -> bool:
|
|
214
309
|
if self._state_dict_cache is not None and key in self._state_dict_cache:
|
|
215
310
|
return True
|
|
@@ -248,21 +343,68 @@ class LazyStateDict:
|
|
|
248
343
|
def __iter__(self) -> Iterator[str]:
|
|
249
344
|
if self._index is not None:
|
|
250
345
|
return iter(self._index)
|
|
251
|
-
|
|
346
|
+
elif self._state_dict_cache is not None:
|
|
347
|
+
return iter(self._state_dict_cache)
|
|
348
|
+
else:
|
|
349
|
+
raise RuntimeError(
|
|
350
|
+
"Unexpected error: cannot determine the keys in the state dict."
|
|
351
|
+
)
|
|
252
352
|
|
|
253
|
-
def keys(self) ->
|
|
254
|
-
|
|
353
|
+
def keys(self) -> Iterator[str]:
|
|
354
|
+
for key in self:
|
|
355
|
+
yield key
|
|
255
356
|
|
|
256
|
-
def values(self) ->
|
|
257
|
-
|
|
357
|
+
def values(self) -> Iterator[torch.Tensor]:
|
|
358
|
+
for key in self:
|
|
359
|
+
yield self[key]
|
|
258
360
|
|
|
259
361
|
def items(self) -> Iterator[Tuple[str, torch.Tensor]]:
|
|
260
|
-
|
|
362
|
+
for key in self:
|
|
363
|
+
yield key, self[key]
|
|
261
364
|
|
|
262
365
|
def __repr__(self) -> str:
|
|
263
366
|
if self._index is not None:
|
|
264
|
-
return f"{self.__class__.__name__}(
|
|
367
|
+
return f"{self.__class__.__name__}(keys={list(self.keys())})"
|
|
265
368
|
else:
|
|
266
369
|
return (
|
|
267
370
|
f"{self.__class__.__name__}(checkpoint_files={self._checkpoint_files})"
|
|
268
371
|
)
|
|
372
|
+
|
|
373
|
+
def get_parameter(self, target: str) -> torch.Tensor:
|
|
374
|
+
return self[target]
|
|
375
|
+
|
|
376
|
+
def get_submodule(self, target: str) -> nn.Module:
|
|
377
|
+
if self.meta_module is not None:
|
|
378
|
+
module: nn.Module = deepcopy(self.meta_module.get_submodule(target))
|
|
379
|
+
module.to_empty(device=self._device)
|
|
380
|
+
state_dict = {}
|
|
381
|
+
for name, _ in module.named_parameters():
|
|
382
|
+
state_dict[name] = self[f"{target}.{name}"]
|
|
383
|
+
module.load_state_dict(state_dict)
|
|
384
|
+
return module
|
|
385
|
+
else:
|
|
386
|
+
raise RuntimeError(
|
|
387
|
+
"Cannot get submodule because meta_module is not provided."
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
def load_state_dict(
|
|
391
|
+
self, state_dict: Dict[str, torch.Tensor], strict: bool = True
|
|
392
|
+
) -> None:
|
|
393
|
+
"""
|
|
394
|
+
Load a state dict into this LazyStateDict.
|
|
395
|
+
This method is only for compatibility with nn.Module and it overrides the cache of LazyStateDict.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
state_dict (Dict[str, torch.Tensor]): The state dict to load.
|
|
399
|
+
strict (bool): Whether to enforce that all keys in the state dict are present in this LazyStateDict.
|
|
400
|
+
"""
|
|
401
|
+
log.warning(
|
|
402
|
+
"Loading state dict into LazyStateDict is not recommended, as it may lead to unexpected behavior. "
|
|
403
|
+
"Use with caution."
|
|
404
|
+
)
|
|
405
|
+
if strict:
|
|
406
|
+
for key in state_dict:
|
|
407
|
+
if key not in self:
|
|
408
|
+
raise KeyError(f"Key {key} not found in LazyStateDict.")
|
|
409
|
+
for key, value in state_dict.items():
|
|
410
|
+
self[key] = value
|
fusion_bench/utils/misc.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import Iterable, List
|
|
2
2
|
|
|
3
|
-
__all__ = ["first", "has_length", "join_list"]
|
|
3
|
+
__all__ = ["first", "has_length", "join_list", "attr_equal"]
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def first(iterable: Iterable):
|
|
@@ -23,3 +23,21 @@ def join_list(list_of_list: List[List]):
|
|
|
23
23
|
for item in list_of_list:
|
|
24
24
|
ans.extend(item)
|
|
25
25
|
return ans
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def attr_equal(obj, attr: str, value):
|
|
29
|
+
"""
|
|
30
|
+
Check if the attribute of the object is equal to the given value.
|
|
31
|
+
Returns False if the attribute does not exist or is not equal to the value.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
obj: The object to check.
|
|
35
|
+
attr (str): The attribute name to check.
|
|
36
|
+
value: The value to compare against.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
bool: True if the attribute exists and is equal to the value, False otherwise.
|
|
40
|
+
"""
|
|
41
|
+
if not hasattr(obj, attr):
|
|
42
|
+
return False
|
|
43
|
+
return getattr(obj, attr) == value
|
fusion_bench/utils/pylogger.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
fusion_bench/__init__.py,sha256=
|
|
1
|
+
fusion_bench/__init__.py,sha256=vu3nMzKuiiHkbH13m8SOzj8qYU-n1PreBipWs_xjZig,1937
|
|
2
2
|
fusion_bench/__main__.py,sha256=weUjxpP3ULnDgUxCehdbmoCM9cqfkhDhGB85tAF5qoE,81
|
|
3
3
|
fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
fusion_bench/compat/method/__init__.py,sha256=A9pbskEYB4_ryD6sVrR0qI4eVYsbI7sExbhPeypP3fQ,5757
|
|
@@ -12,6 +12,7 @@ fusion_bench/compat/taskpool/base_pool.py,sha256=1AIZBxqUJgshq0Xo3Yo9es4b-8X8ksN
|
|
|
12
12
|
fusion_bench/compat/taskpool/clip_image_classification.py,sha256=ZYZsbsE-fPzm6yafA0p-6wcDwVGryLmtXXtuEXeQbTY,7425
|
|
13
13
|
fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py,sha256=JsdAE72V1C1eDcA1WCa0PIcSDTrGPclNKFDQ9G-hYts,5786
|
|
14
14
|
fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2axIv8,41
|
|
15
|
+
fusion_bench/constants/banner.py,sha256=fuIO36ETKlS6a3wbwZn-rA2OswSCfOYyyhZ0Fnal1s4,1656
|
|
15
16
|
fusion_bench/constants/clip_vision.py,sha256=k0NRfiacxRaswdxUj91-e3jcP1u-RmvsaaYdqohcQVU,310
|
|
16
17
|
fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
|
|
17
18
|
fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
|
|
@@ -42,12 +43,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
|
|
|
42
43
|
fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
|
|
43
44
|
fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
|
|
44
45
|
fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
|
-
fusion_bench/method/__init__.py,sha256=
|
|
46
|
+
fusion_bench/method/__init__.py,sha256=TMELBu1IdKN86Id1rlNlr-vqsdArti_6mlKLfobHoL4,7888
|
|
46
47
|
fusion_bench/method/base_algorithm.py,sha256=UuITuGnSskcKEwUVINuPoWJUwqGm9AIgyQIOCu8BMks,1162
|
|
47
48
|
fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
|
|
48
49
|
fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
|
|
49
50
|
fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
|
|
50
|
-
fusion_bench/method/simple_average.py,sha256=
|
|
51
|
+
fusion_bench/method/simple_average.py,sha256=A_VYtHhECcxY0_Mppe5ThOY-ip6XUvvtPHsaQKSmDPc,4971
|
|
51
52
|
fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
|
|
52
53
|
fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
|
|
53
54
|
fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
|
|
@@ -88,6 +89,12 @@ fusion_bench/method/doge_ta/__init__.py,sha256=dixO0i5fmhgC_W2_DAQ4PzYnkMCZX5D8t
|
|
|
88
89
|
fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
|
|
89
90
|
fusion_bench/method/doge_ta/doge_ta.py,sha256=ec0qIq3F72nhbCVlfqdk1PYFM7QIlfMofeVFVvmDKiE,13785
|
|
90
91
|
fusion_bench/method/doge_ta/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
|
|
92
|
+
fusion_bench/method/expert_sparsity/__init__.py,sha256=nt7k5cKqA2Bax1aM93ODwsEuibZ_hdFgQsUos_8h2v8,271
|
|
93
|
+
fusion_bench/method/expert_sparsity/mixtral/__init__.py,sha256=FyKDZIyYUnqvGIdJ5BS639UpzSBj11g28ATHs1Yczdk,545
|
|
94
|
+
fusion_bench/method/expert_sparsity/mixtral/dynamic_skipping.py,sha256=e4fsXKSjCdmK-sThX6REk_d1hf-UolRLssQr7b6jD-M,5597
|
|
95
|
+
fusion_bench/method/expert_sparsity/mixtral/layer_wise_pruning.py,sha256=GJVIose_Duk4C6Re4LtaxSzGjR8XLGGlhLhsGMECwjw,4960
|
|
96
|
+
fusion_bench/method/expert_sparsity/mixtral/progressive_pruning.py,sha256=-0qWYkvHqKouJynn-kT907JQtiMLChtppOTL4SUYR9M,5090
|
|
97
|
+
fusion_bench/method/expert_sparsity/utils/calibration_data.py,sha256=jEWW60qXrnAyiAPz8gbpvQ4hFeL1P1ykoIzoydAaDAk,5459
|
|
91
98
|
fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqvesqk2NJw5AY_1ztLVE,225
|
|
92
99
|
fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
|
|
93
100
|
fusion_bench/method/fisher_merging/fisher_merging.py,sha256=OiceW0bqvnzGjIyIjd0A55ckXImDfEvi-Nk6td0sFFw,20892
|
|
@@ -108,11 +115,13 @@ fusion_bench/method/gossip/utils.py,sha256=ggMPRdxs--U2sV670oimX7jo8NGBX5Oq8Mlpr
|
|
|
108
115
|
fusion_bench/method/isotropic_merging/__init__.py,sha256=yyx1Exfrf_4CtTjml1CIplFeeEDsSUk2Zc0AJ98ST9M,584
|
|
109
116
|
fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4Rd0WcEPsYvQhBSCg,3773
|
|
110
117
|
fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
|
|
118
|
+
fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
119
|
+
fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
|
|
111
120
|
fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
|
|
112
121
|
fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
|
|
113
122
|
fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
|
|
114
123
|
fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
|
|
115
|
-
fusion_bench/method/linear/simple_average_for_llama.py,sha256=
|
|
124
|
+
fusion_bench/method/linear/simple_average_for_llama.py,sha256=OcjvfG5nuUzdo3P4Xi1mO6ApRu51YAUYXG5lAMeD6rg,2711
|
|
116
125
|
fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
|
|
117
126
|
fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
|
|
118
127
|
fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=ys_td1IeL3bzPTE0Cixlj2JooCaB7qseRwSDwroAk5A,18777
|
|
@@ -199,7 +208,8 @@ fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1
|
|
|
199
208
|
fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
|
|
200
209
|
fusion_bench/method/task_singular_vector/utils/TSVC_utils.py,sha256=FytKbal48EW6iGIA-2zV7QSVbYTVflXr4Mr56q0W75k,2286
|
|
201
210
|
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsNVpceHamQytZi-q4wzrCmGGQCYOm67mI,29146
|
|
202
|
-
fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=
|
|
211
|
+
fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
|
|
212
|
+
fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
|
|
203
213
|
fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
|
|
204
214
|
fusion_bench/method/ties_merging/ties_merging.py,sha256=GAlomW4oTePXd57TvogQXoliNnEto1_QVXVGVrU1QNc,5807
|
|
205
215
|
fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
|
|
@@ -243,7 +253,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
|
|
|
243
253
|
fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
|
|
244
254
|
fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
|
|
245
255
|
fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
|
|
246
|
-
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=
|
|
256
|
+
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=7-mUWVGVsXyljH_06CmIyReClKx_xVjy5zeXTJcLQIk,8085
|
|
247
257
|
fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
|
|
248
258
|
fusion_bench/modelpool/clip_vision/modelpool.py,sha256=ADgzslXwYd95x42V26XvgS09WEKGfhH_AYuQmWKdT0w,5887
|
|
249
259
|
fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
|
|
@@ -253,7 +263,7 @@ fusion_bench/modelpool/seq2seq_lm/modelpool.py,sha256=IjLHi8qycWOA4Ul9jnqR48evgV
|
|
|
253
263
|
fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=k-t4RetcDlbkRkPHNuyeV3pQEcJnFRjd9Wp5tFBb-G8,128
|
|
254
264
|
fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
|
|
255
265
|
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIKAmGJwfrNSuWtxzJ_-ME0gQksEYY2y-jVt7P82Qs0,3434
|
|
256
|
-
fusion_bench/models/__init__.py,sha256=
|
|
266
|
+
fusion_bench/models/__init__.py,sha256=w2QbRl-nIHMHNCl9X46f2CD6oqZfEDAxGRs4G9cw2nw,145
|
|
257
267
|
fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
|
|
258
268
|
fusion_bench/models/parameter_dict.py,sha256=RBAXZ-PFLxy3eHxQqWLEvjKIR1uTHBWdKP0XXMNGmQg,3635
|
|
259
269
|
fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
|
|
@@ -264,6 +274,11 @@ fusion_bench/models/we_moe.py,sha256=0U-m3mhzb4vFLIzn2jd7j_SQOF9lot4ddzq0l_VPp9g
|
|
|
264
274
|
fusion_bench/models/chat_templates/__init__.py,sha256=v9vKrCfBgZ3UsMBQatZv1Z-ayPualBl5ciV0aO3p3iY,85
|
|
265
275
|
fusion_bench/models/chat_templates/llama_3_Instruct.py,sha256=E6grNPECr0r1KDPIGW_DmpKQw5-Dh5WbMiTaHWDXwXo,4008
|
|
266
276
|
fusion_bench/models/chat_templates/load_tokenizer.py,sha256=yRs3dB2tZo0Oh-YLJcMZzWSQ5Ps8KXrggZNb5F-aBuM,1400
|
|
277
|
+
fusion_bench/models/expert_sparsity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
278
|
+
fusion_bench/models/expert_sparsity/mixtral/__init__.py,sha256=3L_dcXW3op6ichd3GTlrTEZF_UA57Pyr13SlQRer7lg,439
|
|
279
|
+
fusion_bench/models/expert_sparsity/mixtral/dataset.py,sha256=1-OxRbK-TRaQBJuOfnuzQKSV_55mMRV6iqKWBuX5BIM,1350
|
|
280
|
+
fusion_bench/models/expert_sparsity/mixtral/modeling_mixtral.py,sha256=uGbn69toZ3ldHZKfwXNBijjcPQXeDdXpwJv3HnVwUbc,8252
|
|
281
|
+
fusion_bench/models/expert_sparsity/mixtral/wrapper.py,sha256=1zACEwXDNbi9uwI96oD84YrCsh6b8yh25ZjP3q37muo,10167
|
|
267
282
|
fusion_bench/models/linearized/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
268
283
|
fusion_bench/models/linearized/linearized_model_utils.py,sha256=5yKXReQHIwDttzT_oXwY_iIpaG1zIU0Nv93BWmmOqrg,3212
|
|
269
284
|
fusion_bench/models/linearized/vision_model.py,sha256=HhbhtyoLD1qVvh1Sgl_beYF2W7AvMevmUy4Jx2XlcsY,4636
|
|
@@ -326,7 +341,7 @@ fusion_bench/optim/lr_scheduler/utils/__init__.py,sha256=GfZk9VYL3cFE1Qy2xQpGc1G
|
|
|
326
341
|
fusion_bench/optim/lr_scheduler/utils/visualization.py,sha256=Ea1n9ElNizAe0iUnjynyfteuZunv2-UBMN_NfEU2imA,3490
|
|
327
342
|
fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31AsmvwvNvJw,508
|
|
328
343
|
fusion_bench/programs/base_program.py,sha256=0dX_KcMWASo53pr-ldzfUBWIjEXy6oeDWZBrfc7FIk8,195
|
|
329
|
-
fusion_bench/programs/fabric_fusion_program.py,sha256=
|
|
344
|
+
fusion_bench/programs/fabric_fusion_program.py,sha256=978t9Fw9kvw-Il7rJLR2jNI1OfSxkhq1c5-5D4BgnYU,13813
|
|
330
345
|
fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
331
346
|
fusion_bench/scripts/cli.py,sha256=hw32XtmixFxYXwgAY7iRBMzma_XQjdf_FxPiXKL6dIc,1154
|
|
332
347
|
fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
|
|
@@ -370,7 +385,7 @@ fusion_bench/tasks/clip_classification/fer2013.py,sha256=_oc2fdV308ywcb16rLZxBAd
|
|
|
370
385
|
fusion_bench/tasks/clip_classification/flower102.py,sha256=p_JMs6HCCPZBKe7PTXt0WABsd-KcgmpBkxDSlJJaVVY,2096
|
|
371
386
|
fusion_bench/tasks/clip_classification/food101.py,sha256=Oepvws5byGxrHswXt3ILG3UEPiZaFXYqK1yJqm1uYVE,1968
|
|
372
387
|
fusion_bench/tasks/clip_classification/gtsrb.py,sha256=Dsaz-XNz6oA9nNTF2C2iXmmhhVz-gsw-WcGuFTqjzl4,2677
|
|
373
|
-
fusion_bench/tasks/clip_classification/imagenet.py,sha256=
|
|
388
|
+
fusion_bench/tasks/clip_classification/imagenet.py,sha256=EhZ2iYAc8oApr5BU_vgM3cDY879anTkvb-5hfi-B7m4,48826
|
|
374
389
|
fusion_bench/tasks/clip_classification/kmnist.py,sha256=Ohce6aVaXkPnviDaZYXANMhhBNHZXO3FnXYxYG-ISVg,311
|
|
375
390
|
fusion_bench/tasks/clip_classification/mnist.py,sha256=-gQpHz_kCXmUOtAsM8FBUYFjlwcbAgnqpuVtRfCJ3JM,129
|
|
376
391
|
fusion_bench/tasks/clip_classification/mongo_leaf_disease.py,sha256=L_2IgnzbZdGZrX27VNGu1rC-N3Aj4fetIXB9HM1QZkI,519
|
|
@@ -389,26 +404,26 @@ fusion_bench/tasks/flan_t5_text_generation/glue_evaluation.py,sha256=-B1wqVGp3wZ
|
|
|
389
404
|
fusion_bench/tasks/flan_t5_text_generation/glue_load_dataset.py,sha256=sVihXHbqwi8IlDpiIxzvmDv-Ob7WKvi23GIRYbBUKOc,1833
|
|
390
405
|
fusion_bench/tasks/flan_t5_text_generation/glue_preprocessors.py,sha256=GhRmGmcJGF4oVgZQarsBtx8GNKrNEZUkrillNz3iBuY,13183
|
|
391
406
|
fusion_bench/tasks/flan_t5_text_generation/glue_prompt_templates.py,sha256=mKMTXIr5o-BqS_Hvv1bbMvvjQLLeKNVw7BKS9qgQ8Dw,1890
|
|
392
|
-
fusion_bench/utils/__init__.py,sha256=
|
|
407
|
+
fusion_bench/utils/__init__.py,sha256=XbmQGNmzVKnPLodevlM15iEIXCFx3hled7Vni4fzPYc,504
|
|
393
408
|
fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
|
|
394
409
|
fusion_bench/utils/cache_utils.py,sha256=rU8x4-RFUtaCZWKd4Kft_7xgPTr1bpXnqUDMkrIdpj8,1653
|
|
395
410
|
fusion_bench/utils/data.py,sha256=L3aS2OwlpiXoILdPlo-j03gJh4s2LpAJw6fw9uY5G7c,6571
|
|
396
411
|
fusion_bench/utils/devices.py,sha256=MIAxbEGinN-QU4W1g3-YKkJsteHQrwhbLqkmbzX1W3U,8035
|
|
397
412
|
fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
|
|
398
|
-
fusion_bench/utils/dtype.py,sha256=
|
|
413
|
+
fusion_bench/utils/dtype.py,sha256=qtsDFfm5XTuxsjvVg-orpWvbhebCvyivzzZbLg-xiaA,4327
|
|
399
414
|
fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
|
|
400
|
-
fusion_bench/utils/fabric.py,sha256=
|
|
415
|
+
fusion_bench/utils/fabric.py,sha256=dF0Aj8NmVir30io6WcL5gpWmbQSPlEADvw_yFxFx1sQ,613
|
|
401
416
|
fusion_bench/utils/functools.py,sha256=7_tYJ2WD88_2DDuOOj5aZz3cYuslYH5tsVyIgCeLtmk,1318
|
|
402
417
|
fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqkoSGk,1174
|
|
403
418
|
fusion_bench/utils/instantiate_utils.py,sha256=57D8YP25OO-ArltOSsHDKtnNcA44m1yAq-1wKZc2YVI,17523
|
|
404
419
|
fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
|
|
405
420
|
fusion_bench/utils/lazy_imports.py,sha256=v5l9cpHXPMaz1IVBmB5oOqefYr9vA3XvP340xT7Wy18,2796
|
|
406
|
-
fusion_bench/utils/lazy_state_dict.py,sha256=
|
|
407
|
-
fusion_bench/utils/misc.py,sha256=
|
|
421
|
+
fusion_bench/utils/lazy_state_dict.py,sha256=Hu8PkhbJcUikXJxWUJ7vabu2uDbnUUF6UsRS0k8i71U,16841
|
|
422
|
+
fusion_bench/utils/misc.py,sha256=Qc3_H8UMooOp81Ow89zqvM1sNPIybq1cbq7s4-4lsfU,1082
|
|
408
423
|
fusion_bench/utils/packages.py,sha256=L64paDi1SmeT3gRvRV6LaqB8AeGdzIYWIRI31qSQbSk,2110
|
|
409
424
|
fusion_bench/utils/parameters.py,sha256=2vs8vo2o-nRA9NOMOYFye-X8-aHQZoYe54tM6n0r0RE,11757
|
|
410
425
|
fusion_bench/utils/path.py,sha256=hRA1CPHNnTYBUmzbftH77sHvn4aTuybEK5Tth1skP-k,531
|
|
411
|
-
fusion_bench/utils/pylogger.py,sha256=
|
|
426
|
+
fusion_bench/utils/pylogger.py,sha256=amlRsdqHpOjxmBl6f9TA8y0LaWelEWgQNcGgEGsVOIc,3333
|
|
412
427
|
fusion_bench/utils/rich_utils.py,sha256=B8DhAYuVp23pG6ZnnYrUhcL-ikHZoQeTNqlM7u4pwwU,5786
|
|
413
428
|
fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
|
|
414
429
|
fusion_bench/utils/state_dict_arithmetic.py,sha256=iz5YYhMJpg2-lBLBY8E1onV4i_GkRhJOGn2DjhLBbYE,11390
|
|
@@ -422,7 +437,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
|
|
|
422
437
|
fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
|
|
423
438
|
fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
|
|
424
439
|
fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
|
|
425
|
-
fusion_bench-0.2.
|
|
440
|
+
fusion_bench-0.2.19.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
|
|
426
441
|
fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
|
|
427
442
|
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
|
|
428
443
|
fusion_bench_config/fabric_model_fusion.yaml,sha256=YwJx_aUXm4ca4_mVItKVUOesMvmBBRGudQIOqgc1EP8,974
|
|
@@ -525,6 +540,7 @@ fusion_bench_config/fabric/llama_ddp.yaml,sha256=bOOuK5BPKmScE6yh5xY59qlawlMk2sR
|
|
|
525
540
|
fusion_bench_config/fabric/llama_fsdp.yaml,sha256=pTvz0k79dSOVAAlvU0T1kNd8TNCwz2FGjDOujBtQ_Ks,574
|
|
526
541
|
fusion_bench_config/fabric/llama_peft_fsdp.yaml,sha256=AosSmY4624iahKbTWY681BsZTC1ul78x9aHZ9zHS81s,579
|
|
527
542
|
fusion_bench_config/fabric/loggers/csv_logger.yaml,sha256=Pv8I-xbxrpTb_fwtDiUtCAEoCZ8QYCLu2GeJNzb3Z3c,373
|
|
543
|
+
fusion_bench_config/fabric/loggers/mlflow_logger.yaml,sha256=iu_3Y57hRuc-FjJGoTDlcRqxq3K6U2vHBaBvhOPp8hk,71
|
|
528
544
|
fusion_bench_config/fabric/loggers/tensorboard_logger.yaml,sha256=w9ZP1i8lRYQFslzEM98PmbcFhhn5dXReSJhLOdEi-do,381
|
|
529
545
|
fusion_bench_config/fabric/loggers/wandb_logger.yaml,sha256=eF4slc6QPRuMCMJVeFHNJirsGiB15WQIxNgioXNwezc,142
|
|
530
546
|
fusion_bench_config/fabric/strategy/deepspeed.yaml,sha256=zcSUeHVaATy92oTTRx3_hWQkCB3BPR7YOIt_U1gimCU,343
|
|
@@ -567,6 +583,8 @@ fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKl
|
|
|
567
583
|
fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
|
|
568
584
|
fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=Ih9dqifpnvxW2QfJqp8Q8S8W1k7VZG9ulyPxkcuaWsw,54
|
|
569
585
|
fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
|
|
586
|
+
fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
|
|
587
|
+
fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
|
|
570
588
|
fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml,sha256=rl7kfVvdo2pG-DnglQUbjzkyBqnq1FpfoSDSjFtdLwk,633
|
|
571
589
|
fusion_bench_config/method/fisher_merging/fisher_merging.yaml,sha256=B1wrv9mhaOID4KcAUEMZNxlvY3tR3Q3UGualFslvx-Y,475
|
|
572
590
|
fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml,sha256=AE7XZqRDj4__J_ipEcjPs7qTB2J3xLQyFRlq1W4iHFE,563
|
|
@@ -778,6 +796,7 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
|
|
|
778
796
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=yC2U_IoBAhawgSahY_mdi7ea5kJ2SSRPJ2FM-bA-E9M,510
|
|
779
797
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=a2nviqKSRNoQScYVbj5buq0PbUzmYJwNWdPBUoLaeV8,386
|
|
780
798
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=G6yvZuWOKb75RLn6tu2LPnwHUyvoxPfL_wqb_B11aZo,549
|
|
799
|
+
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml,sha256=HZXjqbZKpSZCHb-G8qjj03PcvXg_8mrAuewDHZp0oEw,263
|
|
781
800
|
fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
|
|
782
801
|
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
|
|
783
802
|
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
|
|
@@ -858,8 +877,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
|
|
|
858
877
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
|
|
859
878
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
|
|
860
879
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
|
|
861
|
-
fusion_bench-0.2.
|
|
862
|
-
fusion_bench-0.2.
|
|
863
|
-
fusion_bench-0.2.
|
|
864
|
-
fusion_bench-0.2.
|
|
865
|
-
fusion_bench-0.2.
|
|
880
|
+
fusion_bench-0.2.19.dist-info/METADATA,sha256=5pl4dtlAYklMMiMLBeKNaHqCQRd7sLSct7aIh9JIoGY,21966
|
|
881
|
+
fusion_bench-0.2.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
882
|
+
fusion_bench-0.2.19.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
|
|
883
|
+
fusion_bench-0.2.19.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
|
|
884
|
+
fusion_bench-0.2.19.dist-info/RECORD,,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
_target_: fusion_bench.method.LayerWisePruningForMixtral
|
|
2
|
+
num_preserved_experts: 4
|
|
3
|
+
# c4 or math
|
|
4
|
+
# corresponding to the keys of `fusion_bench.method.expert_sparsity.utils.calibration_data.DATASETS`
|
|
5
|
+
calib_set: c4
|
|
6
|
+
# Maximal sequence length of each sample in calibration set
|
|
7
|
+
max_block_size: 2048
|
|
8
|
+
# Number of sequences in calibration set. If set to 0 or negative, the whole dataset will be used
|
|
9
|
+
n_blocks_for_stat: 128
|
|
10
|
+
# Batch size for model inference
|
|
11
|
+
batch_size: 8
|
|
12
|
+
# Number of workers in dataloader
|
|
13
|
+
num_workers: 8
|
|
14
|
+
# Random seed
|
|
15
|
+
seed: 42
|
|
16
|
+
# Path to save the pruned model
|
|
17
|
+
model_save_path: "{log_dir}/pruned_model"
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
_recursive_: false
|
|
3
|
+
|
|
4
|
+
load_lazy: false
|
|
5
|
+
models:
|
|
6
|
+
_pretrained_: Qwen/Qwen2.5-1.5B
|
|
7
|
+
expert_1: Qwen/Qwen2.5-Math-1.5B
|
|
8
|
+
expert_2: Qwen/Qwen2.5-Coder-1.5B
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: Qwen/Qwen2.5-1.5B
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|