fusion-bench 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/method/__init__.py +8 -0
- fusion_bench/method/ensemble.py +17 -2
- fusion_bench/method/linear/__init__.py +6 -2
- fusion_bench/method/linear/{simple_average_for_llama.py → simple_average_for_causallm.py} +8 -4
- fusion_bench/method/linear/{task_arithmetic_for_llama.py → task_arithmetic_for_causallm.py} +22 -12
- fusion_bench/method/linear/ties_merging_for_causallm.py +70 -0
- fusion_bench/method/simple_average.py +2 -2
- fusion_bench/method/task_arithmetic/task_arithmetic.py +35 -10
- fusion_bench/method/ties_merging/ties_merging.py +22 -6
- fusion_bench/method/wudi/__init__.py +1 -0
- fusion_bench/method/wudi/wudi.py +105 -0
- fusion_bench/mixins/lightning_fabric.py +4 -0
- fusion_bench/mixins/serialization.py +25 -78
- fusion_bench/modelpool/causal_lm/causal_lm.py +32 -10
- fusion_bench/models/hf_clip.py +4 -0
- fusion_bench/models/hf_utils.py +2 -1
- fusion_bench/models/model_card_templates/default.md +8 -1
- fusion_bench/models/wrappers/ensemble.py +136 -7
- fusion_bench/scripts/cli.py +2 -2
- fusion_bench/taskpool/clip_vision/taskpool.py +11 -4
- fusion_bench/utils/devices.py +30 -8
- fusion_bench/utils/lazy_state_dict.py +3 -0
- fusion_bench/utils/rich_utils.py +7 -3
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/METADATA +10 -3
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/RECORD +37 -30
- fusion_bench_config/method/ensemble/simple_ensemble.yaml +1 -0
- fusion_bench_config/method/linear/{simple_average_for_llama.yaml → simple_average_for_causallm.yaml} +1 -1
- fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml +4 -0
- fusion_bench_config/method/linear/ties_merging_for_causallm.yaml +13 -0
- fusion_bench_config/method/wudi/wudi.yaml +4 -0
- fusion_bench_config/modelpool/CausalLMPool/{Qwen2.5-1.5B_math_and_coder.yaml → Qwen2.5-1.5B_math_and_code.yaml} +1 -2
- fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml +11 -0
- fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml +0 -4
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/WHEEL +0 -0
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/top_level.txt +0 -0
fusion_bench/utils/devices.py
CHANGED
|
@@ -39,7 +39,12 @@ def clear_cuda_cache():
|
|
|
39
39
|
log.warning("CUDA is not available. No cache to clear.")
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
def to_device(
|
|
42
|
+
def to_device(
|
|
43
|
+
obj: T,
|
|
44
|
+
device: Optional[torch.device],
|
|
45
|
+
copy_on_move: bool = False,
|
|
46
|
+
**kwargs: Any,
|
|
47
|
+
) -> T:
|
|
43
48
|
"""
|
|
44
49
|
Move a given object to the specified device.
|
|
45
50
|
|
|
@@ -49,12 +54,20 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
|
|
|
49
54
|
Args:
|
|
50
55
|
obj: The object to be moved to the device. This can be a torch.Tensor, torch.nn.Module, list, tuple, or dict.
|
|
51
56
|
device (torch.device): The target device to move the object to. This can be `None`.
|
|
52
|
-
|
|
57
|
+
copy_on_move (bool, optional): Whether to force a copy operation when moving tensors to a different device.
|
|
58
|
+
If True, tensors will be copied when moved to a different device (copy=True is passed to tensor.to()).
|
|
59
|
+
If False (default), tensors are moved without forcing a copy operation, allowing PyTorch to optimize
|
|
60
|
+
the operation. This parameter only affects torch.Tensor objects; modules and other types are unaffected.
|
|
61
|
+
Defaults to False.
|
|
62
|
+
**kwargs: Additional keyword arguments to be passed to the `to` method of torch.Tensor or torch.nn.Module.
|
|
63
|
+
For example, `non_blocking=True`, `dtype=torch.float16`. Note that if `copy_on_move=True`, the `copy`
|
|
64
|
+
keyword argument will be automatically set and should not be provided manually.
|
|
53
65
|
|
|
54
66
|
Returns:
|
|
55
67
|
The object moved to the specified device. The type of the returned object matches the type of the input object.
|
|
56
68
|
|
|
57
69
|
Examples:
|
|
70
|
+
```python
|
|
58
71
|
>>> tensor = torch.tensor([1, 2, 3])
|
|
59
72
|
>>> to_device(tensor, torch.device('cuda'))
|
|
60
73
|
tensor([1, 2, 3], device='cuda:0')
|
|
@@ -66,17 +79,26 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
|
|
|
66
79
|
>>> data = [torch.tensor([1, 2]), torch.tensor([3, 4])]
|
|
67
80
|
>>> to_device(data, torch.device('cuda'))
|
|
68
81
|
[tensor([1, 2], device='cuda:0'), tensor([3, 4], device='cuda:0')]
|
|
82
|
+
|
|
83
|
+
>>> # Force copy when moving to different device
|
|
84
|
+
>>> tensor = torch.tensor([1, 2, 3], device='cpu')
|
|
85
|
+
>>> copied_tensor = to_device(tensor, torch.device('cuda'), copy_on_move=True)
|
|
86
|
+
>>> # tensor and copied_tensor will have different memory locations
|
|
87
|
+
```
|
|
69
88
|
"""
|
|
70
|
-
if isinstance(obj,
|
|
89
|
+
if isinstance(obj, torch.Tensor):
|
|
90
|
+
if copy_on_move:
|
|
91
|
+
if obj.device != torch.device(device):
|
|
92
|
+
kwargs["copy"] = True
|
|
93
|
+
return obj.to(device, **kwargs)
|
|
94
|
+
elif isinstance(obj, torch.nn.Module):
|
|
71
95
|
return obj.to(device, **kwargs)
|
|
72
96
|
elif isinstance(obj, list):
|
|
73
|
-
return [to_device(o, device) for o in obj]
|
|
97
|
+
return [to_device(o, device, **kwargs) for o in obj]
|
|
74
98
|
elif isinstance(obj, tuple):
|
|
75
|
-
return tuple(to_device(o, device) for o in obj)
|
|
99
|
+
return tuple(to_device(o, device, **kwargs) for o in obj)
|
|
76
100
|
elif isinstance(obj, dict):
|
|
77
|
-
for key in obj
|
|
78
|
-
obj[key] = to_device(obj[key], device)
|
|
79
|
-
return obj
|
|
101
|
+
return {key: to_device(value, device, **kwargs) for key, value in obj.items()}
|
|
80
102
|
else:
|
|
81
103
|
# the default behavior is to return the object as is
|
|
82
104
|
return obj
|
|
@@ -76,6 +76,9 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
|
|
|
76
76
|
_index: Optional[Dict[str, str]]
|
|
77
77
|
"""Mapping of parameter names to checkpoint files."""
|
|
78
78
|
|
|
79
|
+
meta_module: TorchModelType = None
|
|
80
|
+
meta_module_class: Optional[Type[TorchModelType]] = None
|
|
81
|
+
|
|
79
82
|
def __init__(
|
|
80
83
|
self,
|
|
81
84
|
checkpoint: str,
|
fusion_bench/utils/rich_utils.py
CHANGED
|
@@ -188,17 +188,21 @@ if __name__ == "__main__":
|
|
|
188
188
|
display_available_styles()
|
|
189
189
|
|
|
190
190
|
|
|
191
|
-
def setup_colorlogging(
|
|
191
|
+
def setup_colorlogging(
|
|
192
|
+
force=False,
|
|
193
|
+
level=logging.INFO,
|
|
194
|
+
**kwargs,
|
|
195
|
+
):
|
|
192
196
|
"""
|
|
193
197
|
Sets up color logging for the application.
|
|
194
198
|
"""
|
|
195
199
|
FORMAT = "%(message)s"
|
|
196
200
|
|
|
197
201
|
logging.basicConfig(
|
|
198
|
-
level=
|
|
202
|
+
level=level,
|
|
199
203
|
format=FORMAT,
|
|
200
204
|
datefmt="[%X]",
|
|
201
205
|
handlers=[RichHandler()],
|
|
202
206
|
force=force,
|
|
203
|
-
**
|
|
207
|
+
**kwargs,
|
|
204
208
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fusion_bench
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.24
|
|
4
4
|
Summary: A Comprehensive Benchmark of Deep Model Fusion
|
|
5
5
|
Author-email: Anke Tang <tang.anke@foxmail.com>
|
|
6
6
|
Project-URL: Repository, https://github.com/tanganke/fusion_bench
|
|
@@ -23,12 +23,19 @@ Requires-Dist: rich
|
|
|
23
23
|
Requires-Dist: scipy
|
|
24
24
|
Requires-Dist: h5py
|
|
25
25
|
Requires-Dist: pytest
|
|
26
|
+
Requires-Dist: joblib
|
|
27
|
+
Requires-Dist: bidict
|
|
26
28
|
Requires-Dist: transformers!=4.49
|
|
27
29
|
Requires-Dist: pillow!=11.2.1
|
|
28
30
|
Provides-Extra: lm-eval-harness
|
|
29
31
|
Requires-Dist: lm-eval; extra == "lm-eval-harness"
|
|
30
32
|
Requires-Dist: immutabledict; extra == "lm-eval-harness"
|
|
31
33
|
Requires-Dist: langdetect; extra == "lm-eval-harness"
|
|
34
|
+
Requires-Dist: rich-run; extra == "lm-eval-harness"
|
|
35
|
+
Provides-Extra: docs
|
|
36
|
+
Requires-Dist: mkdocs; extra == "docs"
|
|
37
|
+
Requires-Dist: mkdocs-material; extra == "docs"
|
|
38
|
+
Requires-Dist: mkdocstrings[python]; extra == "docs"
|
|
32
39
|
Dynamic: license-file
|
|
33
40
|
|
|
34
41
|
<div align='center'>
|
|
@@ -151,7 +158,7 @@ This will install the latest version of fusion-bench and the dependencies requir
|
|
|
151
158
|
Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
|
|
152
159
|
|
|
153
160
|
> [!TIP]
|
|
154
|
-
> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/
|
|
161
|
+
> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/llm) or in the [`docs/modelpool/llm/index.md`](docs/modelpool/llm/index.md) markdown file.
|
|
155
162
|
|
|
156
163
|
## Introduction to Deep Model Fusion
|
|
157
164
|
|
|
@@ -179,7 +186,7 @@ The project is structured as follows:
|
|
|
179
186
|
- `taskpool`: configuration files for the task pool.
|
|
180
187
|
- `model`: configuration files for the models.
|
|
181
188
|
- `dataset`: configuration files for the datasets.
|
|
182
|
-
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -
|
|
189
|
+
- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -e ".[docs]"`.
|
|
183
190
|
- `examples/`: example scripts for running some of the experiments.
|
|
184
191
|
> **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
|
|
185
192
|
- `tests/`: unit tests for the benchmark.
|
|
@@ -48,12 +48,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
|
|
|
48
48
|
fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
|
|
49
49
|
fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
|
|
50
50
|
fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
fusion_bench/method/__init__.py,sha256
|
|
51
|
+
fusion_bench/method/__init__.py,sha256=-d5WMlvY3kHYSUeompoG71T6fSttXPDjPf6X4TxNkqY,8986
|
|
52
52
|
fusion_bench/method/base_algorithm.py,sha256=OnKSNPQ_nIdIWxryyblW_sko7uoEBN4lGh-eLkJ4kh4,9004
|
|
53
53
|
fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
|
|
54
|
-
fusion_bench/method/ensemble.py,sha256=
|
|
54
|
+
fusion_bench/method/ensemble.py,sha256=Bjzqxt-tUp5cawT1jIhqKswN5QH3bkYbmuI4LS4uTG0,3619
|
|
55
55
|
fusion_bench/method/model_recombination.py,sha256=b2ku5wCrWd1QSZscIra4KlhLDxt04JjU30ItMNvpZ6g,5268
|
|
56
|
-
fusion_bench/method/simple_average.py,sha256=
|
|
56
|
+
fusion_bench/method/simple_average.py,sha256=FuIwHCUNK5CoToBzVt-lo8SK7wjj8CdRpiNLRnAflH4,5519
|
|
57
57
|
fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
|
|
58
58
|
fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
|
|
59
59
|
fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
|
|
@@ -128,12 +128,13 @@ fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4R
|
|
|
128
128
|
fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
|
|
129
129
|
fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
130
130
|
fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
|
|
131
|
-
fusion_bench/method/linear/__init__.py,sha256=
|
|
131
|
+
fusion_bench/method/linear/__init__.py,sha256=0U7JqlX8JuMECKlvLNM16Lxc1lCBN2bVqH8FtNoD-Fw,417
|
|
132
132
|
fusion_bench/method/linear/expo.py,sha256=N7XnBTC0Nz_4gRs1f9TL9g-j-Lku5TF0lAjGKhZHwOw,3990
|
|
133
133
|
fusion_bench/method/linear/linear_interpolation.py,sha256=Y01HPMBb7TaCjEBsbC6gqQyHvY1SRpwPyPPLxvYrL0s,2223
|
|
134
134
|
fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
|
|
135
|
-
fusion_bench/method/linear/
|
|
136
|
-
fusion_bench/method/linear/
|
|
135
|
+
fusion_bench/method/linear/simple_average_for_causallm.py,sha256=qc-JiPLu19442DcP0xCl4EDGzVnIbq3WGiAiWkNzv6E,3448
|
|
136
|
+
fusion_bench/method/linear/task_arithmetic_for_causallm.py,sha256=7cewnrjX47omokAdhNvDIQV8zz06_ZNKPWM7CZx30R0,2247
|
|
137
|
+
fusion_bench/method/linear/ties_merging_for_causallm.py,sha256=yi0RCC6eRwXMKUC_cBdFLvejia4nmjPh9Pd0MpaUrVg,2392
|
|
137
138
|
fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
|
|
138
139
|
fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=1nvjOMABuEISyYaTRrFiwHLWvSTgHT8pEzTYBTLBRUg,18779
|
|
139
140
|
fusion_bench/method/lm_finetune/causal_lm_pretrain.py,sha256=4CL9KGFsUzrt-edMfTooo4G4apzTH_57rso3DGGvKL0,219
|
|
@@ -222,7 +223,7 @@ fusion_bench/method/tall_mask/__init__.py,sha256=XINPP8PqGQ01he9p2RyHaKGyrcYoJuY
|
|
|
222
223
|
fusion_bench/method/tall_mask/task_arithmetic.py,sha256=c-5ehKV_t46ljvKTBDr-eA3-FbSD_UNXlza4cOqK5aI,4371
|
|
223
224
|
fusion_bench/method/tall_mask/utils.py,sha256=Wlp8WcPwR_lCaBIZ9rgG6ewLfSzz3G7kPk9yj13pvls,8817
|
|
224
225
|
fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
|
|
225
|
-
fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=
|
|
226
|
+
fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=KsSBshf04MUwIjoc0HAAmY6cWMqjZwZOYXbUuU4EaL0,6320
|
|
226
227
|
fusion_bench/method/task_singular_vector/TSVC.py,sha256=yn4SrZNvtA6PoGYJmbmtNeDyDbGnRCgfZ7ZCg914AZU,410
|
|
227
228
|
fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1DsVqSVD-Hipp-Sj_HoA,13652
|
|
228
229
|
fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
|
|
@@ -231,7 +232,7 @@ fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsN
|
|
|
231
232
|
fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
|
|
232
233
|
fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
|
|
233
234
|
fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
|
|
234
|
-
fusion_bench/method/ties_merging/ties_merging.py,sha256=
|
|
235
|
+
fusion_bench/method/ties_merging/ties_merging.py,sha256=u2o7Wo2SJJsxxhBeAhsmY7k4bdZkUtwAwGePGI4Sggc,5916
|
|
235
236
|
fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
|
|
236
237
|
fusion_bench/method/trust_region/__init__.py,sha256=4ao0E-jTlmTQPArbFWD_dFn_4yve3urNIuSMT8JtRIM,91
|
|
237
238
|
fusion_bench/method/trust_region/clip_task_arithmetic.py,sha256=SWP7sRMiXzkDZ3KdNht3zqjaTcAtB4wpnnd8KYbcKZI,7441
|
|
@@ -245,6 +246,8 @@ fusion_bench/method/we_moe/we_moe.py,sha256=_QtmD04oFh7aLhmPq8EYchYB7BIN9ZFWOeys
|
|
|
245
246
|
fusion_bench/method/weighted_average/__init__.py,sha256=bLxIuuB72hH05J_Spz4MZbiLpYL39iwgVIQa_QeQpIk,118
|
|
246
247
|
fusion_bench/method/weighted_average/llama.py,sha256=vvxXp8v98kvXfHi7fYupnIrOVoA3tp08lmV2jDri_BY,3731
|
|
247
248
|
fusion_bench/method/weighted_average/weighted_average.py,sha256=E4byEA2VfXozu7S_gnYVvwI3qg8AFWaSeNRHGbs2Tno,3340
|
|
249
|
+
fusion_bench/method/wudi/__init__.py,sha256=08qPzOlhjw-Ab8TwyY9MGOGx_TLrUTueJc1WgRIvuxU,44
|
|
250
|
+
fusion_bench/method/wudi/wudi.py,sha256=HL3Y0MPjozp7NML_UNjIWWPbQDQxYH_WG_BuyripeBQ,3602
|
|
248
251
|
fusion_bench/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
252
|
fusion_bench/metrics/continual_learning/__init__.py,sha256=f-mkv4SpXTq5kiQVHbe2g0IPf4yLFgu1Dw7g2DOK6T4,57
|
|
250
253
|
fusion_bench/metrics/continual_learning/backward_transfer.py,sha256=LCMWFFmBgWv7UIAJqiTaSvVvanx4qjnXIGuCMYvzmtc,559
|
|
@@ -262,10 +265,10 @@ fusion_bench/mixins/__init__.py,sha256=yjRvcB9Mn-c0g8tXmoBf2Dn8gyc-Na6dyhc4r674a
|
|
|
262
265
|
fusion_bench/mixins/clip_classification.py,sha256=8dqJuI3AVetFZKuzTp1SR2kGQ-vGvfbcmwfnzuUiwfI,10096
|
|
263
266
|
fusion_bench/mixins/fabric_training.py,sha256=ZmycEhCaNCgVi5oM9m0q6msxgk3quowmFvDAcvskFrg,13017
|
|
264
267
|
fusion_bench/mixins/hydra_config.py,sha256=rfT-XPUKV_U3nvuTVsKLmSmEiieoSIsbhxE5_-E0er0,5508
|
|
265
|
-
fusion_bench/mixins/lightning_fabric.py,sha256=
|
|
268
|
+
fusion_bench/mixins/lightning_fabric.py,sha256=5iamAL7YV6lEm_-8NuzFjfIy1vslwKthSpCSWLLhlCM,7506
|
|
266
269
|
fusion_bench/mixins/openclip_classification.py,sha256=O45HzgLXNvlQr5RVpfIGsYdIQ0tY5g_68KB0MTqsZWU,290
|
|
267
270
|
fusion_bench/mixins/rich_live.py,sha256=j7wNgrgwfdpl6nCXZGF_2DLtNq2aqCb_52Qhe9QSltc,495
|
|
268
|
-
fusion_bench/mixins/serialization.py,sha256=
|
|
271
|
+
fusion_bench/mixins/serialization.py,sha256=z73Mmq952TIdPwwZ8cRdl3n0_uc9lqylFI9fxKesREs,13260
|
|
269
272
|
fusion_bench/mixins/simple_profiler.py,sha256=czWMl6p9PoxbQ5A8Uifwleaq5QPGEn0qMc8MXu9dSZM,2200
|
|
270
273
|
fusion_bench/mixins/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
271
274
|
fusion_bench/mixins/optim/adamw_with_warmup.py,sha256=qTnRl8GVVIfaplOFBHnJFuZUbxPZRWRGHGNzm_EDhDE,1421
|
|
@@ -277,7 +280,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
|
|
|
277
280
|
fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
|
|
278
281
|
fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
|
|
279
282
|
fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
|
|
280
|
-
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=
|
|
283
|
+
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=gpUQqxZIuKoaQ-gvdPsLVxI7UifueR6k3YzbUV1i0lk,19902
|
|
281
284
|
fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
|
|
282
285
|
fusion_bench/modelpool/clip_vision/modelpool.py,sha256=e5t9olRMOj_SyGVy-gqn7RwC5FAqxNsJDongWIv2KFY,7108
|
|
283
286
|
fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
|
|
@@ -288,8 +291,8 @@ fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=_VB9nlR_gm6IEXNM
|
|
|
288
291
|
fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
|
|
289
292
|
fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=t9wXHFwa7V2XC3ajxt4_bSsxMTDKW4nebvdxhG7VeLM,3435
|
|
290
293
|
fusion_bench/models/__init__.py,sha256=LeLQw2Yphu4QKZxjws_7MCM50XvFP1rTrvJ_2SR5zIA,271
|
|
291
|
-
fusion_bench/models/hf_clip.py,sha256=
|
|
292
|
-
fusion_bench/models/hf_utils.py,sha256=
|
|
294
|
+
fusion_bench/models/hf_clip.py,sha256=lL4LxbdwC_rDWRozdEJmRlzKaNcQMpWwCSMDE0tfZRM,7525
|
|
295
|
+
fusion_bench/models/hf_utils.py,sha256=bfB3QAUqsG-TyUeOWrZt8V7GeWDhp-fKg3P0J3D_TbQ,5497
|
|
293
296
|
fusion_bench/models/parameter_dict.py,sha256=HCkTJCz23pYN1_Hhegx8gglOtrnzVKJPMeg9_rUhe18,3630
|
|
294
297
|
fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
|
|
295
298
|
fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
|
|
@@ -318,7 +321,7 @@ fusion_bench/models/llama/model_utils/mod.py,sha256=xzNOgTRfOK9q8kml4Q2nmSOl23f3
|
|
|
318
321
|
fusion_bench/models/llama/model_utils/visual.py,sha256=wpqWqEASyA7WhJLCfC26h0Cdn5CXnwC1qPJUlSXggo4,8310
|
|
319
322
|
fusion_bench/models/masks/__init__.py,sha256=vXG6jrBkDbPsnrX6nMEYAW1rQuGEWDgdjID7cKzXvrs,69
|
|
320
323
|
fusion_bench/models/masks/mask_model.py,sha256=YXNZ_CGp6VPshZH__Znh6Z07BqOK53G-Ltc1LVy1E3I,5502
|
|
321
|
-
fusion_bench/models/model_card_templates/default.md,sha256=
|
|
324
|
+
fusion_bench/models/model_card_templates/default.md,sha256=DJXwDODCsqIOhkgP57-iCShxLYK_jnsDsJYH1GfbBY8,1028
|
|
322
325
|
fusion_bench/models/modeling_deepseek_v2/__init__.py,sha256=trXrhtKb_gIxXVo7wSZ-il5sLJtDTiNZezRrEt3M8zM,505
|
|
323
326
|
fusion_bench/models/modeling_deepseek_v2/configuration_deepseek.py,sha256=TblFOCfNwaXUnXnD-sxFhSn5Df-_yy2LMcrth-sBPFI,10301
|
|
324
327
|
fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py,sha256=PtfkfPrfmQVoLiVhgqlp5toJAnCinPWfeZYeJJtWWBs,78676
|
|
@@ -362,7 +365,7 @@ fusion_bench/models/smile_moe/utils/svd_utils.py,sha256=A2u7lH5Bo2qhgwplHPAz56pd
|
|
|
362
365
|
fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9zGKN7VpE70,53
|
|
363
366
|
fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
|
|
364
367
|
fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
365
|
-
fusion_bench/models/wrappers/ensemble.py,sha256=
|
|
368
|
+
fusion_bench/models/wrappers/ensemble.py,sha256=T-DAKrAm-ciZwV6Hbt8uASbjtoQpHTlvVyan3rhk_8k,11632
|
|
366
369
|
fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=A7LjG0inL5oeEVOkJwEUDM15v4dpQnsCq2y9zA78R3k,11198
|
|
367
370
|
fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=q5Hc4BtLpAawMbxsWJRL-8OR-x7994Jhr9IyN7vKZ9o,16930
|
|
368
371
|
fusion_bench/models/wrappers/task_wise_fusion.py,sha256=ROLANdDq0bZ3sIROqIv3udPN8lzDdEwxD0Jonx-5ycw,17465
|
|
@@ -377,7 +380,7 @@ fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31Asmvwv
|
|
|
377
380
|
fusion_bench/programs/base_program.py,sha256=Bl_bv8SawEUc-GBTtZFMoii0y-r-0hOXBAJkQFexWCU,3475
|
|
378
381
|
fusion_bench/programs/fabric_fusion_program.py,sha256=jt0_tlg37a2jBl2YikaC0N71Gmr4J340wkKAekyT180,12453
|
|
379
382
|
fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
380
|
-
fusion_bench/scripts/cli.py,sha256=
|
|
383
|
+
fusion_bench/scripts/cli.py,sha256=kEWLEkZEBqUr1_-XTePzNC5NM8lwWvgUBf0Lcuk_FI8,2739
|
|
381
384
|
fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
|
|
382
385
|
fusion_bench/scripts/nyuv2_mtl_train.py,sha256=W1C45R9NdF4O-UjCx1bUxRTdFE0-FlRpwJHZ5gY18rI,3602
|
|
383
386
|
fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
|
|
@@ -392,7 +395,7 @@ fusion_bench/taskpool/clip_vision/__init__.py,sha256=ItdyWYy2A5xQKzh1dXi9kbQTBig
|
|
|
392
395
|
fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py,sha256=t_lmo8W-ZgLLOiBnF5CWfaLbKwz3EXfO8gCavI34qQY,3733
|
|
393
396
|
fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py,sha256=UdI7npI53LjPV2B19tHymhbma6WYcZIvzhqaSyZKkSQ,4762
|
|
394
397
|
fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py,sha256=8lZIG6tWpctYzme0Q_n6QcGnn9MeDmP3UX8nEv4_a9Q,4232
|
|
395
|
-
fusion_bench/taskpool/clip_vision/taskpool.py,sha256=
|
|
398
|
+
fusion_bench/taskpool/clip_vision/taskpool.py,sha256=99F8w_e4-UnoeDkSjo0z_8Wstx6e635h0IqSdtfT7ms,16460
|
|
396
399
|
fusion_bench/taskpool/clip_vision/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
397
400
|
fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py,sha256=LY9wxWCm_4X7Ii0ZkMxhtbevz6OxS3Bkqz0puXhuRqM,2393
|
|
398
401
|
fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
|
|
@@ -442,7 +445,7 @@ fusion_bench/utils/__init__.py,sha256=wNAfpP-u_-8HGbLaBoHT_wriU_cNvY4M_UXdBv2kXh
|
|
|
442
445
|
fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
|
|
443
446
|
fusion_bench/utils/cache_utils.py,sha256=-bTZijQgl4BuAx0VSJFD-bSDOXuq3o0NkrOaiLiyofU,4795
|
|
444
447
|
fusion_bench/utils/data.py,sha256=aalB3kGbZUF-PZ_IaAhcXanRKhS-RNMT5mUrEBb4R3E,6722
|
|
445
|
-
fusion_bench/utils/devices.py,sha256=
|
|
448
|
+
fusion_bench/utils/devices.py,sha256=6AkGcs3flt0FSo9yfEREuehoTrgcc65gkwpTWQy8XsI,9546
|
|
446
449
|
fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
|
|
447
450
|
fusion_bench/utils/dtype.py,sha256=z6UlPGF9dzG4Ik8rXGf59PJk_RKzG6Trp8O6wcBS9PU,4360
|
|
448
451
|
fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
|
|
@@ -452,14 +455,14 @@ fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqk
|
|
|
452
455
|
fusion_bench/utils/instantiate_utils.py,sha256=OXkfhq_o3Sgy5n3Psf-HI-dIfbK9oD2GBdfcx3gT63Q,17526
|
|
453
456
|
fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
|
|
454
457
|
fusion_bench/utils/lazy_imports.py,sha256=dg4Uu8FaoEu0WGVTo5o_PbLZs3Ei_RG75Ta-Us1iPW4,3500
|
|
455
|
-
fusion_bench/utils/lazy_state_dict.py,sha256=
|
|
458
|
+
fusion_bench/utils/lazy_state_dict.py,sha256=srEKyctbuBW3yrVFSG7Tki_XkBwoc6eUmDXLxHXqX0o,20328
|
|
456
459
|
fusion_bench/utils/misc.py,sha256=93q0m-HYWkPK91Co5lll_J0Dxs6YahW2lD_X8fUAyTk,2420
|
|
457
460
|
fusion_bench/utils/modelscope.py,sha256=P8fV6Eff8oP0LVGIFGbLvuk8MBteysN438djZ6ZEfE4,10699
|
|
458
461
|
fusion_bench/utils/packages.py,sha256=wKl-qtPjA61LrdgTTusuNyvs8jfUv4mA5IwPTFWyYtA,2139
|
|
459
462
|
fusion_bench/utils/parameters.py,sha256=ufEDOYJwcQQxLfveK8hBAGwpu5J3LA_cTWiDgZ2zkJ0,11788
|
|
460
463
|
fusion_bench/utils/path.py,sha256=qrfgar3b-6_2v032-2hTt97L6qdtG7zc3CFrGFyKSGE,2400
|
|
461
464
|
fusion_bench/utils/pylogger.py,sha256=r2KXTvq-j8uHdjBBoVPOgkjv4c6pyhbX6xf1JbOsF4w,3335
|
|
462
|
-
fusion_bench/utils/rich_utils.py,sha256=
|
|
465
|
+
fusion_bench/utils/rich_utils.py,sha256=24RF-OHK6h9ggZ95csw_vMU8YtxYNOxlzjcH7dpuESY,5863
|
|
463
466
|
fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
|
|
464
467
|
fusion_bench/utils/state_dict_arithmetic.py,sha256=fczHDEpL2_UmxNIdvQtllXvBWBcmKpw-p6CIS_upjwI,11818
|
|
465
468
|
fusion_bench/utils/tensorboard.py,sha256=9fkgNYR9LM38nPNkudcxL9TjLUseW-280M0k2nLff7o,1669
|
|
@@ -472,7 +475,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
|
|
|
472
475
|
fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
|
|
473
476
|
fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
|
|
474
477
|
fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
|
|
475
|
-
fusion_bench-0.2.
|
|
478
|
+
fusion_bench-0.2.24.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
|
|
476
479
|
fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
|
|
477
480
|
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
|
|
478
481
|
fusion_bench_config/fabric_model_fusion.yaml,sha256=U8BxsaOvsg9bsEZcIpBE-feo9n9G7Y1kQDHqPVxUYAg,2601
|
|
@@ -621,7 +624,7 @@ fusion_bench_config/method/dare/ties_merging.yaml,sha256=7gDW4XpezrsccsbJGqqKrbX
|
|
|
621
624
|
fusion_bench_config/method/dawe/dawe_for_clip.yaml,sha256=99P5xpp1YGvIwXGxDcxRtJMLE2FhvEFmFBQjOMEcGoc,1023
|
|
622
625
|
fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKlsc-X5nCFzmVh0dvr-w,78
|
|
623
626
|
fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
|
|
624
|
-
fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=
|
|
627
|
+
fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=RKa3IgN3DfFZVmeXVIdTt0NdPVV0jFkpQz6SxLs3Kso,124
|
|
625
628
|
fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
|
|
626
629
|
fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
|
|
627
630
|
fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
|
|
@@ -638,8 +641,9 @@ fusion_bench_config/method/linear/expo.yaml,sha256=St3NW6cKVRV3vCn8y0gxQ8k66VTdt
|
|
|
638
641
|
fusion_bench_config/method/linear/linear_interpolation.yaml,sha256=chM6_HRKKcMleTeuKY3-YNI1qaMG2CfnsRwUxAlHsRw,66
|
|
639
642
|
fusion_bench_config/method/linear/llama_expo.yaml,sha256=SvqamjT06BMObQ58sks5x7Wv6kGpp3-Nlw3ihbD_kSA,621
|
|
640
643
|
fusion_bench_config/method/linear/llama_expo_with_dare.yaml,sha256=Pp8s2xmEg5XSvaGKtwTYx_PzcGvwRh2gPpZ6u9as4_E,383
|
|
641
|
-
fusion_bench_config/method/linear/
|
|
642
|
-
fusion_bench_config/method/linear/
|
|
644
|
+
fusion_bench_config/method/linear/simple_average_for_causallm.yaml,sha256=qqeIr61PJEcfZclZ5vV64GCzyt-8b1zB0FDZu8DsbXQ,322
|
|
645
|
+
fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml,sha256=tJA0n0_XVvll4rZYVHQVqFCz8W3Bey6NjPKMIH3-P0U,142
|
|
646
|
+
fusion_bench_config/method/linear/ties_merging_for_causallm.yaml,sha256=1oEIdxV0OqWjDQ9V_lmXEPUayp4KbKHE2SvpCLmiKOU,489
|
|
643
647
|
fusion_bench_config/method/linear/weighted_average.yaml,sha256=uq2gHGCwVHHSa1H-hzcrSlumUTLJ50tfyiY1Mh1pFsk,186
|
|
644
648
|
fusion_bench_config/method/linear/weighted_average_for_llama.yaml,sha256=se2aq6t5R1f-ZG6ubUyRr__DBe9BzXrgL81ua3DkQoM,498
|
|
645
649
|
fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml,sha256=QHsRfJK9K4KajsX3LBHG8cDt7ZLJWxOBnJjpHRQSB_s,1348
|
|
@@ -686,6 +690,7 @@ fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=-Ipc05T
|
|
|
686
690
|
fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml,sha256=KIKUr_Q4e9pJSVlqUFatuLp5vg8kNEsn8tOE4R77sxA,653
|
|
687
691
|
fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=mMVaFJWUZmIdhg0kVQY20i7cmgTMrOSgoSpmW7quRzc,993
|
|
688
692
|
fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
|
|
693
|
+
fusion_bench_config/method/wudi/wudi.yaml,sha256=3mJ6-XKHwwHALS3d503ybGM7pc1PhEK91YwwMybuzMc,76
|
|
689
694
|
fusion_bench_config/model/clip-vit/README.md,sha256=-s34C9X7pxy55xSc24kbf-4ctK7UC-Wpu_JWIe9O0Ko,1382
|
|
690
695
|
fusion_bench_config/model/clip-vit/clip-vit-base-patch16.yaml,sha256=Fn7or7-5fVZNyp6fH1lkwk7mq7iVhpR3sMt6Sm7Yg6I,43
|
|
691
696
|
fusion_bench_config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml,sha256=8G2OCCDaSJkzDOMDsV08NE-Z5YWMjDsFVs1WY3OWNss,787
|
|
@@ -841,9 +846,11 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
|
|
|
841
846
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=FuPWQbC9xEV5wZjuo835gOMNgbzmpK9RbjFjA_HOzqo,2476
|
|
842
847
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=9PCkbrNnQSKTsm4eoUvVgjGd3IY7wHBC4LWj4kOdY4Y,1406
|
|
843
848
|
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=bqnyzgwIvDtV3Fb-uLf9mdFv0NW1C392lxGsGUPLsKE,400
|
|
844
|
-
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.
|
|
849
|
+
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_code.yaml,sha256=4DoMFlGabtwZXZMGWsWtkP2rlGOx_1eEPp_AyqyVln0,263
|
|
850
|
+
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml,sha256=ofFFVYKHKtylxd90REMLhhP57Yqwe2AEbGuZ0mBCVz8,305
|
|
845
851
|
fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml,sha256=Nxk72MurqSzEyPJzGoKFbk5T2TGWBwYpH2V9Jzqt648,229
|
|
846
852
|
fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
|
|
853
|
+
fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml,sha256=mm7A3NilcANJBuCZMt3MMLKFm7CjBhMYWAa9TXjM_PQ,326
|
|
847
854
|
fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
|
|
848
855
|
fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
|
|
849
856
|
fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=H3UwSk4ChsGSrH49LuttxldFURW-4RVUtnIa0ClHKXo,802
|
|
@@ -927,8 +934,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
|
|
|
927
934
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
|
|
928
935
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
|
|
929
936
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
|
|
930
|
-
fusion_bench-0.2.
|
|
931
|
-
fusion_bench-0.2.
|
|
932
|
-
fusion_bench-0.2.
|
|
933
|
-
fusion_bench-0.2.
|
|
934
|
-
fusion_bench-0.2.
|
|
937
|
+
fusion_bench-0.2.24.dist-info/METADATA,sha256=DllRpMnvVgyeqjN_YlNeo7IlqukzOjuYO_cWopOo1tA,22621
|
|
938
|
+
fusion_bench-0.2.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
939
|
+
fusion_bench-0.2.24.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
|
|
940
|
+
fusion_bench-0.2.24.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
|
|
941
|
+
fusion_bench-0.2.24.dist-info/RECORD,,
|
fusion_bench_config/method/linear/{simple_average_for_llama.yaml → simple_average_for_causallm.yaml}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
_target_: fusion_bench.method.
|
|
1
|
+
_target_: fusion_bench.method.SimpleAverageForCausalLM
|
|
2
2
|
# set `merge_backbone` to true if you has a base model and only want to merge the backbone of the experts
|
|
3
3
|
# if `merge_backbone` is False, this is equivalent to `SimpleAverageAlgorithm`
|
|
4
4
|
merge_backbone: false
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
_target_: fusion_bench.method.TiesMergingForCausalLM
|
|
2
|
+
# Scaling factor $\lambda$
|
|
3
|
+
scaling_factor: 0.3
|
|
4
|
+
# Threshold for resetting values in the task vector
|
|
5
|
+
threshold: 20
|
|
6
|
+
# List of keys to remove from the state dict, default is empty
|
|
7
|
+
remove_keys: []
|
|
8
|
+
# Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max'
|
|
9
|
+
merge_func: sum
|
|
10
|
+
# Whether to merge only the backbone layers
|
|
11
|
+
merge_backbone: false
|
|
12
|
+
# Path to save the merged model
|
|
13
|
+
model_save_path: ${path.log_dir}/checkpoint
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
_recursive_: false
|
|
3
|
+
enable_lazy_loading: true
|
|
4
|
+
models:
|
|
5
|
+
_pretrained_: Qwen/Qwen2.5-1.5B
|
|
6
|
+
math: Qwen/Qwen2.5-Math-1.5B
|
|
7
|
+
code: Qwen/Qwen2.5-Coder-1.5B
|
|
8
|
+
instruction: Qwen/Qwen2.5-1.5B-Instruct
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: Qwen/Qwen2.5-1.5B
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
_recursive_: false
|
|
3
|
+
enable_lazy_loading: true
|
|
4
|
+
models:
|
|
5
|
+
_pretrained_: meta-llama/Llama-2-7b-hf
|
|
6
|
+
chat: meta-llama/Llama-2-7b-chat-hf
|
|
7
|
+
math: WizardLMTeam/WizardMath-7B-V1.0
|
|
8
|
+
code: codellama/CodeLlama-7b-hf
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: meta-llama/Llama-2-7b-hf
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|