fusion-bench 0.2.13__py3-none-any.whl → 0.2.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fusion_bench/method/ada_svd/clip_vision.py +4 -1
- fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +46 -145
- fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +229 -0
- fusion_bench/method/smile_upscaling/smile_upscaling.py +6 -336
- fusion_bench/modelpool/causal_lm/causal_lm.py +73 -10
- fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py +2 -203
- fusion_bench/models/modeling_smile_qwen2/__init__.py +8 -0
- fusion_bench/models/modeling_smile_qwen2/configuration_smile_qwen2.py +21 -0
- fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +922 -0
- fusion_bench/models/modeling_smile_qwen2/register.py +11 -0
- fusion_bench/models/rankone_moe.py +2 -88
- fusion_bench/models/smile_moe/linear_from_hf_config.py +373 -0
- fusion_bench/models/smile_moe/{linear.py → linear_from_module.py} +103 -33
- fusion_bench/models/smile_moe/utils/__init__.py +24 -0
- fusion_bench/models/smile_moe/utils/svd_utils.py +46 -0
- fusion_bench/taskpool/__init__.py +2 -0
- fusion_bench/taskpool/lm_eval_harness/__init__.py +3 -0
- fusion_bench/taskpool/lm_eval_harness/taskpool.py +87 -0
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/METADATA +26 -3
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/RECORD +36 -15
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/WHEEL +1 -1
- fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +5 -2
- fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +13 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B-Instruct.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B-Instruct.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml +11 -0
- fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +17 -0
- fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml +12 -0
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/entry_points.txt +0 -0
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/licenses/LICENSE +0 -0
- {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.15.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from torch import Tensor
|
|
5
|
+
|
|
6
|
+
from .svd_utils import svd
|
|
7
|
+
|
|
8
|
+
__all__ = ["svd_utils", "_is_all_zeros"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _is_all_zeros(tensor: Tensor | List[Tensor]) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Check if a tensor or a list of tensors are all zeros.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
tensor (Tensor | List[Tensor]): A tensor or a list of tensors.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
bool: True if all elements are zeros, False otherwise.
|
|
20
|
+
"""
|
|
21
|
+
if isinstance(tensor, Tensor):
|
|
22
|
+
return torch.allclose(tensor, torch.zeros_like(tensor))
|
|
23
|
+
else:
|
|
24
|
+
return all(_is_all_zeros(t) for t in tensor)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
from typing import Optional, Tuple, Union
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from torch import Tensor
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _svd(w: Tensor, full_matrices: bool = True) -> Tuple[Tensor, Tensor, Tensor]:
|
|
8
|
+
"""
|
|
9
|
+
Perform Singular Value Decomposition (SVD) on a tensor.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
w (Tensor): The input tensor.
|
|
13
|
+
full_matrices (bool): Whether to compute the full-sized U and V matrices.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
|
|
17
|
+
"""
|
|
18
|
+
u, s, vh = torch.linalg.svd(
|
|
19
|
+
w, full_matrices=full_matrices, driver="gesvd" if w.is_cuda else None
|
|
20
|
+
)
|
|
21
|
+
v = vh.T
|
|
22
|
+
return u, s, v
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def svd(
|
|
26
|
+
w: Tensor,
|
|
27
|
+
full_matrices: bool = True,
|
|
28
|
+
accelerator: Optional[Union[torch.device, str]] = None,
|
|
29
|
+
) -> Tuple[Tensor, Tensor, Tensor]:
|
|
30
|
+
"""
|
|
31
|
+
Perform SVD on a tensor, optionally using a specified accelerator.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
w (Tensor): The input tensor.
|
|
35
|
+
full_matrices (bool): Whether to compute the full-sized U and V matrices.
|
|
36
|
+
accelerator (Optional[Union[torch.device, str]]): The device to perform the computation on.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
|
|
40
|
+
"""
|
|
41
|
+
if accelerator is None:
|
|
42
|
+
return _svd(w, full_matrices=full_matrices)
|
|
43
|
+
original_device = w.device
|
|
44
|
+
w = w.to(accelerator)
|
|
45
|
+
u, s, v = _svd(w)
|
|
46
|
+
return u.to(original_device), s.to(original_device), v.to(original_device)
|
|
@@ -15,6 +15,7 @@ _import_structure = {
|
|
|
15
15
|
"dummy": ["DummyTaskPool"],
|
|
16
16
|
"gpt2_text_classification": ["GPT2TextClassificationTaskPool"],
|
|
17
17
|
"llama": ["LlamaTestGenerationTaskPool"],
|
|
18
|
+
"lm_eval_harness": ["LMEvalHarnessTaskPool"],
|
|
18
19
|
"nyuv2_taskpool": ["NYUv2TaskPool"],
|
|
19
20
|
"openclip_vision": ["OpenCLIPVisionModelTaskPool"],
|
|
20
21
|
}
|
|
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
|
|
|
30
31
|
from .dummy import DummyTaskPool
|
|
31
32
|
from .gpt2_text_classification import GPT2TextClassificationTaskPool
|
|
32
33
|
from .llama import LlamaTestGenerationTaskPool
|
|
34
|
+
from .lm_eval_harness import LMEvalHarnessTaskPool
|
|
33
35
|
from .nyuv2_taskpool import NYUv2TaskPool
|
|
34
36
|
from .openclip_vision import OpenCLIPVisionModelTaskPool
|
|
35
37
|
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from typing import List, Literal, Optional, Union, TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
import lightning.fabric
|
|
6
|
+
import lm_eval
|
|
7
|
+
import lm_eval.models
|
|
8
|
+
from lm_eval.__main__ import check_argument_types, cli_evaluate, setup_parser
|
|
9
|
+
from omegaconf import DictConfig, ListConfig
|
|
10
|
+
|
|
11
|
+
from fusion_bench import BaseTaskPool
|
|
12
|
+
from fusion_bench.mixins import LightningFabricMixin
|
|
13
|
+
from fusion_bench.utils.strenum import _version
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class LMEvalHarnessTaskPool(BaseTaskPool, LightningFabricMixin):
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
tasks: Union[str, List[str]],
|
|
23
|
+
apply_chat_template: bool = False,
|
|
24
|
+
include_path: Optional[str] = None,
|
|
25
|
+
batch_size: int = 1,
|
|
26
|
+
metadata: Optional[DictConfig] = None,
|
|
27
|
+
verbosity: Optional[
|
|
28
|
+
Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
29
|
+
] = None,
|
|
30
|
+
output_path: Optional[str] = None,
|
|
31
|
+
log_samples: bool = False,
|
|
32
|
+
_usage_: Optional[str] = None,
|
|
33
|
+
_version_: Optional[str] = None,
|
|
34
|
+
**kwargs,
|
|
35
|
+
):
|
|
36
|
+
super().__init__(_usage_=_usage_, _version_=_version_)
|
|
37
|
+
self.tasks = tasks
|
|
38
|
+
self.include_path = include_path
|
|
39
|
+
self.batch_size = batch_size
|
|
40
|
+
self.metadata = metadata
|
|
41
|
+
self.apply_chat_template = apply_chat_template
|
|
42
|
+
self.verbosity = verbosity
|
|
43
|
+
self.kwargs = kwargs
|
|
44
|
+
self.output_path = output_path
|
|
45
|
+
self.log_samples = log_samples
|
|
46
|
+
|
|
47
|
+
def evaluate(self, model, *command_line_args, **kwargs):
|
|
48
|
+
command_line_args = []
|
|
49
|
+
if self.include_path is not None:
|
|
50
|
+
command_line_args.extend(["--include_path", self.include_path])
|
|
51
|
+
if isinstance(self.tasks, (list, ListConfig)):
|
|
52
|
+
command_line_args.extend(["--tasks", ",".join(self.tasks)])
|
|
53
|
+
elif isinstance(self.tasks, str):
|
|
54
|
+
command_line_args.extend(["--tasks", self.tasks])
|
|
55
|
+
if self.apply_chat_template:
|
|
56
|
+
command_line_args.extend(
|
|
57
|
+
["--apply_chat_template", str(self.apply_chat_template)]
|
|
58
|
+
)
|
|
59
|
+
if self.batch_size is not None:
|
|
60
|
+
command_line_args.extend(["--batch_size", str(self.batch_size)])
|
|
61
|
+
if self.verbosity is not None:
|
|
62
|
+
command_line_args.extend(["--verbosity", str(self.verbosity)])
|
|
63
|
+
if self.metadata is not None:
|
|
64
|
+
command_line_args.extend(["--metadata", str(self.metadata)])
|
|
65
|
+
if self.output_path is None:
|
|
66
|
+
command_line_args.extend(
|
|
67
|
+
[
|
|
68
|
+
"--output_path",
|
|
69
|
+
os.path.join(self.log_dir, "lm_eval_results"),
|
|
70
|
+
]
|
|
71
|
+
)
|
|
72
|
+
else:
|
|
73
|
+
command_line_args.extend(["--output_path", self.output_path])
|
|
74
|
+
if self.log_samples:
|
|
75
|
+
command_line_args.extend(["--log_samples"])
|
|
76
|
+
for key, value in kwargs.items():
|
|
77
|
+
command_line_args.extend([f"--{key}", str(value)])
|
|
78
|
+
|
|
79
|
+
parser = setup_parser()
|
|
80
|
+
check_argument_types(parser)
|
|
81
|
+
args = parser.parse_args(args=command_line_args)
|
|
82
|
+
log.info("LM-Eval Harness arguments:\n%s", args)
|
|
83
|
+
|
|
84
|
+
if not lightning.fabric.is_wrapped(model):
|
|
85
|
+
model = self.fabric.setup(model)
|
|
86
|
+
args.model = lm_eval.models.huggingface.HFLM(pretrained=model)
|
|
87
|
+
cli_evaluate(args)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fusion_bench
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.15
|
|
4
4
|
Summary: A Comprehensive Benchmark of Deep Model Fusion
|
|
5
5
|
Author-email: Anke Tang <tang.anke@foxmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -45,6 +45,8 @@ Requires-Dist: rich
|
|
|
45
45
|
Requires-Dist: scipy
|
|
46
46
|
Requires-Dist: h5py
|
|
47
47
|
Requires-Dist: pytest
|
|
48
|
+
Provides-Extra: lm-eval-harness
|
|
49
|
+
Requires-Dist: lm-eval; extra == "lm-eval-harness"
|
|
48
50
|
Dynamic: license-file
|
|
49
51
|
|
|
50
52
|
<div align='center'>
|
|
@@ -61,7 +63,7 @@ Dynamic: license-file
|
|
|
61
63
|
|
|
62
64
|
</div>
|
|
63
65
|
|
|
64
|
-
> [!TIP]
|
|
66
|
+
> [!TIP]
|
|
65
67
|
> Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
|
|
66
68
|
|
|
67
69
|
## Overview
|
|
@@ -122,7 +124,7 @@ Merging multiple expert models offers a promising approach for performing multi-
|
|
|
122
124
|
|
|
123
125
|
## Installation
|
|
124
126
|
|
|
125
|
-
|
|
127
|
+
Install from PyPI:
|
|
126
128
|
|
|
127
129
|
```bash
|
|
128
130
|
pip install fusion-bench
|
|
@@ -137,6 +139,27 @@ cd fusion_bench
|
|
|
137
139
|
pip install -e . # install the package in editable mode
|
|
138
140
|
```
|
|
139
141
|
|
|
142
|
+
### Install with [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
|
|
143
|
+
|
|
144
|
+
[](https://doi.org/10.5281/zenodo.10256836)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
pip install "fusion-bench[lm-eval-harness]"
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
or install from local directory
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
pip install -e ".[lm-eval-harness]"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
This will install the latest version of fusion-bench and the dependencies required for LM-Eval Harness.
|
|
158
|
+
Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
|
|
159
|
+
|
|
160
|
+
> [!TIP]
|
|
161
|
+
> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/causal_lm) or in the [`docs/modelpool/causal_lm.md`](docs/modelpool/causal_lm.md) markdown file.
|
|
162
|
+
|
|
140
163
|
## Introduction to Deep Model Fusion
|
|
141
164
|
|
|
142
165
|
Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.
|
|
@@ -49,7 +49,7 @@ fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAc
|
|
|
49
49
|
fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
|
|
50
50
|
fusion_bench/method/simple_average.py,sha256=2ghcL1E-eLbIYDCHYCoR9WtiYSb1GvFAH163OTTTEEI,4481
|
|
51
51
|
fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
|
|
52
|
-
fusion_bench/method/ada_svd/clip_vision.py,sha256=
|
|
52
|
+
fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
|
|
53
53
|
fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
|
|
54
54
|
fusion_bench/method/adamerging/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
|
|
55
55
|
fusion_bench/method/adamerging/clip_task_wise_adamerging.py,sha256=rREVf8SKlQ9SiWdUWOYo91b1RW9PnNJxsW8MxHs_MUo,6095
|
|
@@ -157,8 +157,9 @@ fusion_bench/method/slerp/slerp.py,sha256=2_n10REnRoV5DuwCC0bDX8RM3MLL4Q_5rZiU0h
|
|
|
157
157
|
fusion_bench/method/slerp/slerp_utils.py,sha256=vksRo6n7FqY7By9aqbwTL4XV3BjcU_GrUl_r85Kpfjc,3504
|
|
158
158
|
fusion_bench/method/smile_upscaling/__init__.py,sha256=6ZpUSHUFVsT1U7V3TIDWBFqcHte7SjHW0wp6CAE8NVg,165
|
|
159
159
|
fusion_bench/method/smile_upscaling/singular_projection_merging.py,sha256=0neZS9oZnl64wu1xb9ruGB7lbhYXyy4zj8l3E1QYRGQ,6670
|
|
160
|
-
fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py,sha256=
|
|
161
|
-
fusion_bench/method/smile_upscaling/
|
|
160
|
+
fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py,sha256=T1M4hTRgIfDhy4zSccREPLajgMcdnohr0NtdWXtPzmA,8802
|
|
161
|
+
fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py,sha256=gRo5TKhcFhclKtpI75ixc65YrhoW3kyVr_q-JrCN8n8,8713
|
|
162
|
+
fusion_bench/method/smile_upscaling/smile_upscaling.py,sha256=epkurTJQdO2TlWf1v5qUSiF1Pi1Umz-uQDtHM_iX80c,8978
|
|
162
163
|
fusion_bench/method/sparse_we_moe/__init__.py,sha256=V5VOpLwn6ZpsM09TmwFhhlJwMTBFXF7NE1-gW1MlAfc,133
|
|
163
164
|
fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py,sha256=J8iVYks-SQ93dqh6FUQACvSmM364QHlVBYMKOCPbHrU,10288
|
|
164
165
|
fusion_bench/method/sparse_we_moe/sparse_we_moe.py,sha256=6OYgj_D_4xTtqy_guA7whQu76LQ7gv-U2cIZkXe7bIg,10479
|
|
@@ -218,7 +219,7 @@ fusion_bench/modelpool/huggingface_automodel.py,sha256=OJ6EyYyjNv1_Bhjn-zli-e__B
|
|
|
218
219
|
fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RPE2dcepeEB3agBKkkH-xA3yMj1czw,2014
|
|
219
220
|
fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
|
|
220
221
|
fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
|
|
221
|
-
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=
|
|
222
|
+
fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=fO8lF8YWwoe43sVVOqHW9Ike7x-924-I6QQgZqx9EgA,6505
|
|
222
223
|
fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
|
|
223
224
|
fusion_bench/modelpool/clip_vision/modelpool.py,sha256=JH1wLdWefvE242SYpXTnoSLkKX-YcadnidWd2bo8tWQ,5486
|
|
224
225
|
fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
|
|
@@ -231,7 +232,7 @@ fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIK
|
|
|
231
232
|
fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
|
|
232
233
|
fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
|
|
233
234
|
fusion_bench/models/parameter_dict.py,sha256=lkVaK6xInqHoQ3_N6zx8CNKH4dnf8AP8H9xAY6ds6lg,3515
|
|
234
|
-
fusion_bench/models/rankone_moe.py,sha256=
|
|
235
|
+
fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
|
|
235
236
|
fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
|
|
236
237
|
fusion_bench/models/sparse_we_moe.py,sha256=b-yIeCsl2rz0i7BP9g_fqCEam7KUNjNX_J8oyZV6MJ8,16509
|
|
237
238
|
fusion_bench/models/utils.py,sha256=AQFI2UZSItKfJpG8ex74FPjn_SjsADLhvpv1GYqu43U,2065
|
|
@@ -261,8 +262,12 @@ fusion_bench/models/modeling_losparse_llama/register.py,sha256=rRTvc5hK9GvTiEZGq
|
|
|
261
262
|
fusion_bench/models/modeling_losparse_llama/utils.py,sha256=REQIGeJeNhlKxm2Y7EUumtzj4vdTQQIT1dSiB22_i0o,1886
|
|
262
263
|
fusion_bench/models/modeling_smile_mistral/__init__.py,sha256=q9DmZhBYwTOUsaWOs--tMZ-9zYpAa8KYEJPoNT7IZj0,1171
|
|
263
264
|
fusion_bench/models/modeling_smile_mistral/configuration_smile_mistral.py,sha256=yt1-JBlkJmlJw7dvB4_V8M0gy5ihD8isDxcmwyW85d4,633
|
|
264
|
-
fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py,sha256=
|
|
265
|
+
fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py,sha256=5cN1M_XhcFCoJb8yvO1KCwHD_UH__hJg_X2D9C85R34,33128
|
|
265
266
|
fusion_bench/models/modeling_smile_mistral/register.py,sha256=7nSJC4FveUi78rp53Ps6TcPGedHZ79cikYM5GIfEZfw,400
|
|
267
|
+
fusion_bench/models/modeling_smile_qwen2/__init__.py,sha256=eaf9PRQ-rZRHzL2odWpR6ha3sR0rNMylrf0ZvqGTvbU,241
|
|
268
|
+
fusion_bench/models/modeling_smile_qwen2/configuration_smile_qwen2.py,sha256=aekcpLcUGo4e7GkOtaxKClpIU5byyY-LQNDb-sMeyNc,621
|
|
269
|
+
fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py,sha256=hfn2Tmu4h8t3xiKvUpbXpjcir-aywn7rvvsQK7vLwVA,37247
|
|
270
|
+
fusion_bench/models/modeling_smile_qwen2/register.py,sha256=-6XqWWrUltFkJ76C6NCveJp4vPcqAoijJ3fItmDFsQs,391
|
|
266
271
|
fusion_bench/models/nyuv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
267
272
|
fusion_bench/models/nyuv2/aspp.py,sha256=Nl-Kx9YmGp0BNpDedo9cYbynOwI4SUyILWN2VgiPDIc,2495
|
|
268
273
|
fusion_bench/models/nyuv2/lightning_module.py,sha256=SLtC0yL6455uKeb-o07MR6v-xE4BTKm7B0E2ayQwEBU,5436
|
|
@@ -273,7 +278,10 @@ fusion_bench/models/open_clip/modeling.py,sha256=34wKcbxe5xb6fzAVdIz0QcsSXs-8FQF
|
|
|
273
278
|
fusion_bench/models/open_clip/utils.py,sha256=YM_vGQSxIDoB2euHG54hhRGIcINJfR0NxNT5U42KRCw,10394
|
|
274
279
|
fusion_bench/models/open_clip/variables_and_paths.py,sha256=_OBcKvZwSGvYSmgKtXOuekEJI-btW94Ia-BQ9n4isfY,1231
|
|
275
280
|
fusion_bench/models/smile_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
276
|
-
fusion_bench/models/smile_moe/
|
|
281
|
+
fusion_bench/models/smile_moe/linear_from_hf_config.py,sha256=4vzYYjDHGOf1IO7gO0dzQC1xqcwEij9M7d4tVZm-7dY,11919
|
|
282
|
+
fusion_bench/models/smile_moe/linear_from_module.py,sha256=Sv6rCj1FWr7wQhv2hO2waJIkoHly7HZCP4zbYn3Dk78,10815
|
|
283
|
+
fusion_bench/models/smile_moe/utils/__init__.py,sha256=_FfU5HAQIwl9Ch8M8_VBxLRtvqk0lWr-k1WVfWg61jA,588
|
|
284
|
+
fusion_bench/models/smile_moe/utils/svd_utils.py,sha256=A2u7lH5Bo2qhgwplHPAz56pdbHYUunk2PS6PSvTn19M,1407
|
|
277
285
|
fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9zGKN7VpE70,53
|
|
278
286
|
fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
|
|
279
287
|
fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -298,7 +306,7 @@ fusion_bench/scripts/nyuv2_mtl_train.py,sha256=W1C45R9NdF4O-UjCx1bUxRTdFE0-FlRpw
|
|
|
298
306
|
fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
|
|
299
307
|
fusion_bench/scripts/clip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
300
308
|
fusion_bench/scripts/clip/convert_checkpoint.py,sha256=zncgRAhInFpJDSHIm3GO4F6BzgsdAQVj3LLmV7g-JiQ,1221
|
|
301
|
-
fusion_bench/taskpool/__init__.py,sha256
|
|
309
|
+
fusion_bench/taskpool/__init__.py,sha256=-ltXMsS3jeGxa9vnhOyrbITOUtfNjLwkGPfS2mKDOdY,1312
|
|
302
310
|
fusion_bench/taskpool/base_pool.py,sha256=Cbe3ZgJ34DWSDZeZEjlgqR0b84aM1i68D9-vomaooo8,852
|
|
303
311
|
fusion_bench/taskpool/dummy.py,sha256=Di9JZO3XyDYn6wAGukrJMTnkS_NaxGTeQYo_3j1JD3Y,1675
|
|
304
312
|
fusion_bench/taskpool/gpt2_text_classification.py,sha256=PCNdc2SNGUFGxJ0snmwrnjTdSwmDt9fs7Pe0eDjdvaw,6091
|
|
@@ -313,6 +321,8 @@ fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py,sha256=LY9wxWC
|
|
|
313
321
|
fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
|
|
314
322
|
fusion_bench/taskpool/llama/reward_model.py,sha256=ZpRSX4esBAuE0MdTjPHjqS2TnvGb6P8arOGxBeXnq6Y,5028
|
|
315
323
|
fusion_bench/taskpool/llama/test_generation.py,sha256=kJ_5GruG12FsuJHDh_S7pbQgwEojTqhGpA_wVNH5KPc,6675
|
|
324
|
+
fusion_bench/taskpool/lm_eval_harness/__init__.py,sha256=_usNxe4z9avClSWjwHMxoznnI_UQFMuo7uOEJhP8jMk,81
|
|
325
|
+
fusion_bench/taskpool/lm_eval_harness/taskpool.py,sha256=2eLP4FqVxtIbYe66R1ksNGpt94QOFHmH8C9UgxlWnJ8,3280
|
|
316
326
|
fusion_bench/taskpool/openclip_vision/__init__.py,sha256=02p77Mb1JE7trrv2HtIku5X667WY5LZ2zVuyL3uIcyo,59
|
|
317
327
|
fusion_bench/taskpool/openclip_vision/openclip_taskpool.py,sha256=PtD_Y9CWzPI3WEil_RuXtCh8ImPKcSHtZTqfybmsGdg,6875
|
|
318
328
|
fusion_bench/tasks/__init__.py,sha256=Z_ePIp4Xizkj78QopLg1dZkJAN_IF73MkbR_nkfHQ9Y,52
|
|
@@ -383,7 +393,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
|
|
|
383
393
|
fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
|
|
384
394
|
fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
|
|
385
395
|
fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
|
|
386
|
-
fusion_bench-0.2.
|
|
396
|
+
fusion_bench-0.2.15.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
|
|
387
397
|
fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
|
|
388
398
|
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
|
|
389
399
|
fusion_bench_config/fabric_model_fusion.yaml,sha256=5iPgaM8UOhuvBW2Hap_csst-eqlYRwb_lru8ngjrZ_g,948
|
|
@@ -555,7 +565,8 @@ fusion_bench_config/method/regmean/gpt2_regmean.yaml,sha256=n94aTboDdwSA7Tki8l_o
|
|
|
555
565
|
fusion_bench_config/method/regmean/regmean.yaml,sha256=ZgVVLx-lHwVgjtjTl4VZUlthh8yyua87QvoJfmNHud4,101
|
|
556
566
|
fusion_bench_config/method/slerp/slerp.yaml,sha256=xldDUULtfCdwzAkQUb0C8-TmbW7FqcAlIOsPX8p4n6w,116
|
|
557
567
|
fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml,sha256=ZMn_ImRjjc2uozf7ocQIzbgvFDpBV7S-34KptbBXVGo,200
|
|
558
|
-
fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml,sha256=
|
|
568
|
+
fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml,sha256=VFMrkbO69d0wCjTQCuKysYGVe6hEwNu792g1QkhU5Mk,383
|
|
569
|
+
fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml,sha256=Rdcub7yFFn-jKXrlFoj8LQk1cRbJm2do91pV-YMSzTE,378
|
|
559
570
|
fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml,sha256=G88mabTTniDUtiUC9Vg3cj_sw6D05mE4_ZdyYI4Omjk,477
|
|
560
571
|
fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256=1zfrT2FNmLyCivth4kzGR8Ai9jyQ87OXRbf4di4IE94,642
|
|
561
572
|
fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=UPnMt_GoMSHOeOx5Sv0oHPRoPhhvVRC5zdVA38OTwSg,636
|
|
@@ -715,8 +726,17 @@ fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk
|
|
|
715
726
|
fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=H3UwSk4ChsGSrH49LuttxldFURW-4RVUtnIa0ClHKXo,802
|
|
716
727
|
fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml,sha256=vU0q06OUa1UM_Xvp2t27Rl3F6EDgYWPnDxeyzUH-QVI,589
|
|
717
728
|
fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml,sha256=MpgshGtmMXpUFRA1knjdGRVH4UgZbkkcTmCTrF3LlZk,573
|
|
729
|
+
fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml,sha256=Kbpam1Hds5URMP35dXGdVibH-vTmYPh3xHMkhj6Mgtg,648
|
|
718
730
|
fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml,sha256=FynhZ1PRvyzsyzrHIuMpGgQGRMlu_xI7earm-CeIVeY,824
|
|
719
731
|
fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=zQWfp7mYm6jQ8g41Eeh2d9vAbocZJ5btPX1ft9QpEZU,546
|
|
732
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B-Instruct.yaml,sha256=NDq_prH-b9Vw7lRjsyJIcbeF4MXVVdszxK1FPJxIJYs,453
|
|
733
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml,sha256=Mg_z2vnw7IkNPoMvhl_Ja6gT9tX942sqaNfjXQRzBvg,390
|
|
734
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B-Instruct.yaml,sha256=SfPEji6mWx9Dw48rE0B8MDrYv2NVLC-S98DK5xaU6So,453
|
|
735
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml,sha256=2vpOp9t8SUP2rkBw21mqwRYApkqXQiaYXcZm2oxLox4,390
|
|
736
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml,sha256=8up_cqEhabGeK6l6tMha9DJzsPoEIFN8bS_Kwv7LmCc,389
|
|
737
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml,sha256=SODG0kcnAP6yC0_J_SpSVMRV-v5qGV22gcWdiBaZo1I,368
|
|
738
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml,sha256=zwInWJS8yrhch4vOL1ypRKNWWpJKlhQsyY0Ln14CC-M,389
|
|
739
|
+
fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml,sha256=ufmu4b3lyxn2XLDMVYxP-bKwYaGTjB5-JoYXLG8v8tY,368
|
|
720
740
|
fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md,sha256=DC0HF-isCHshipHTC0Rof6GvjTUa0i2DVQZKrklQQlU,2416
|
|
721
741
|
fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-16_TA8.yaml,sha256=jbJqqciORJQknpSzh2zKiFm6VKDOsmaSk9XfPCVmHGg,1220
|
|
722
742
|
fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA8.yaml,sha256=q2_E2R1wIOdxd-AF-wjXkPO64gJgD27YXsZ8FFLWUIo,1607
|
|
@@ -774,11 +794,12 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397
|
|
|
774
794
|
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml,sha256=2AqMiNCRRunLIrssHvFzu1lUzOaQn8uOHM9yjrQq-_A,109
|
|
775
795
|
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=DNm1LRlQS9KbukEl6oEZzWLizyaOBcYZ2r7L8ZQtnJc,434
|
|
776
796
|
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=EjN3Pu1F_7EuZrk-geyL4qohqJ5-F2UFjWjj2V57ju0,433
|
|
797
|
+
fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=45kSz44pcjTDCL3dnEECRMnN0kIaoWKUFZMFy5JJIyw,416
|
|
777
798
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
|
|
778
799
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
|
|
779
800
|
fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
|
|
780
|
-
fusion_bench-0.2.
|
|
781
|
-
fusion_bench-0.2.
|
|
782
|
-
fusion_bench-0.2.
|
|
783
|
-
fusion_bench-0.2.
|
|
784
|
-
fusion_bench-0.2.
|
|
801
|
+
fusion_bench-0.2.15.dist-info/METADATA,sha256=abOyRl-ejl7CvLRCaRP20vn7rdb5OF92GxS_S9qTK3Q,21171
|
|
802
|
+
fusion_bench-0.2.15.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
803
|
+
fusion_bench-0.2.15.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
|
|
804
|
+
fusion_bench-0.2.15.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
|
|
805
|
+
fusion_bench-0.2.15.dist-info/RECORD,,
|
|
@@ -1,9 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
_target_: fusion_bench.method.smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm
|
|
2
|
+
# device to put the models on
|
|
2
3
|
device: cpu
|
|
4
|
+
# device to perform SVD on
|
|
3
5
|
accelerator: cuda
|
|
4
6
|
# path to save/load the model
|
|
5
7
|
model_path: null
|
|
6
|
-
model_dtype:
|
|
8
|
+
model_dtype: null
|
|
9
|
+
# SmileMoE parameters
|
|
7
10
|
num_experts_per_tok: 1
|
|
8
11
|
rank_of_router: 8
|
|
9
12
|
# if rank_of_expert < 0, dense expert is used.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
_target_: fusion_bench.method.smile_upscaling.smile_qwen2_upscaling.SmileQwen2UpscalingAlgorithm
|
|
2
|
+
# device to put the models on
|
|
3
|
+
device: cpu
|
|
4
|
+
# device to perform SVD on
|
|
5
|
+
accelerator: cuda
|
|
6
|
+
# path to save/load the model
|
|
7
|
+
model_path: null
|
|
8
|
+
model_dtype: null
|
|
9
|
+
# SmileMoE parameters
|
|
10
|
+
num_experts_per_tok: 1
|
|
11
|
+
rank_of_router: 8
|
|
12
|
+
# if rank_of_expert < 0, dense expert is used.
|
|
13
|
+
rank_of_expert: 64
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: meta-llama/Llama-3.1-8B-Instruct
|
|
4
|
+
instruction: MergeBench/Llama-3.1-8B-Instruct_instruction
|
|
5
|
+
math: MergeBench/Llama-3.1-8B-Instruct_math
|
|
6
|
+
coding: MergeBench/Llama-3.1-8B-Instruct_coding
|
|
7
|
+
multilingual: MergeBench/Llama-3.1-8B-Instruct_multilingual
|
|
8
|
+
safety: MergeBench/Llama-3.1-8B-Instruct_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: meta-llama/Llama-3.1-8B-Instruct
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: meta-llama/Llama-3.1-8B
|
|
4
|
+
instruction: MergeBench/Llama-3.1-8B_instruction
|
|
5
|
+
math: MergeBench/Llama-3.1-8B_math
|
|
6
|
+
coding: MergeBench/Llama-3.1-8B_coding
|
|
7
|
+
multilingual: MergeBench/Llama-3.1-8B_multilingual
|
|
8
|
+
safety: MergeBench/Llama-3.1-8B_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: meta-llama/Llama-3.1-8B
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: meta-llama/Llama-3.2-3B-Instruct
|
|
4
|
+
instruction: MergeBench/Llama-3.2-3B-Instruct_instruction
|
|
5
|
+
math: MergeBench/Llama-3.2-3B-Instruct_math
|
|
6
|
+
coding: MergeBench/Llama-3.2-3B-Instruct_coding
|
|
7
|
+
multilingual: MergeBench/Llama-3.2-3B-Instruct_multilingual
|
|
8
|
+
safety: MergeBench/Llama-3.2-3B-Instruct_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: meta-llama/Llama-3.2-3B-Instruct
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: meta-llama/Llama-3.2-3B
|
|
4
|
+
instruction: MergeBench/Llama-3.2-3B_instruction
|
|
5
|
+
math: MergeBench/Llama-3.2-3B_math
|
|
6
|
+
coding: MergeBench/Llama-3.2-3B_coding
|
|
7
|
+
multilingual: MergeBench/Llama-3.2-3B_multilingual
|
|
8
|
+
safety: MergeBench/Llama-3.2-3B_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: meta-llama/Llama-3.2-3B
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: google/gemma-2-2b-it
|
|
4
|
+
instruction: MergeBench/gemma-2-2b-it_instruction
|
|
5
|
+
math: MergeBench/gemma-2-2b-it_math
|
|
6
|
+
coding: MergeBench/gemma-2-2b-it_coding
|
|
7
|
+
multilingual: MergeBench/gemma-2-2b-it_multilingual
|
|
8
|
+
safety: MergeBench/gemma-2-2b-it_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: google/gemma-2-2b-it
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: google/gemma-2-2b
|
|
4
|
+
instruction: MergeBench/gemma-2-2b_instruction
|
|
5
|
+
math: MergeBench/gemma-2-2b_math
|
|
6
|
+
coding: MergeBench/gemma-2-2b_coding
|
|
7
|
+
multilingual: MergeBench/gemma-2-2b_multilingual
|
|
8
|
+
safety: MergeBench/gemma-2-2b_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: google/gemma-2-2b
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: google/gemma-2-9b-it
|
|
4
|
+
instruction: MergeBench/gemma-2-9b-it_instruction
|
|
5
|
+
math: MergeBench/gemma-2-9b-it_math
|
|
6
|
+
coding: MergeBench/gemma-2-9b-it_coding
|
|
7
|
+
multilingual: MergeBench/gemma-2-9b-it_multilingual
|
|
8
|
+
safety: MergeBench/gemma-2-9b-it_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: google/gemma-2-9b-it
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
models:
|
|
3
|
+
_pretrained_: google/gemma-2-9b
|
|
4
|
+
instruction: MergeBench/gemma-2-9b_instruction
|
|
5
|
+
math: MergeBench/gemma-2-9b_math
|
|
6
|
+
coding: MergeBench/gemma-2-9b_coding
|
|
7
|
+
multilingual: MergeBench/gemma-2-9b_multilingual
|
|
8
|
+
safety: MergeBench/gemma-2-9b_safety
|
|
9
|
+
model_kwargs:
|
|
10
|
+
torch_dtype: bfloat16
|
|
11
|
+
tokenizer: google/gemma-2-9b
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
_target_: fusion_bench.modelpool.CausalLMPool
|
|
2
|
+
_recursive_: false
|
|
3
|
+
models:
|
|
4
|
+
_pretrained_:
|
|
5
|
+
_target_: transformers.AutoModelForCausalLM.from_pretrained
|
|
6
|
+
pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
|
|
7
|
+
expert_1:
|
|
8
|
+
_target_: transformers.AutoModelForCausalLM.from_pretrained
|
|
9
|
+
pretrained_model_name_or_path: Qwen/Qwen2.5-Math-1.5B
|
|
10
|
+
expert_2:
|
|
11
|
+
_target_: transformers.AutoModelForCausalLM.from_pretrained
|
|
12
|
+
pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
|
|
13
|
+
model_kwargs:
|
|
14
|
+
torch_dtype: bfloat16
|
|
15
|
+
tokenizer:
|
|
16
|
+
_target_: transformers.AutoTokenizer.from_pretrained
|
|
17
|
+
pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
_target_: fusion_bench.taskpool.LMEvalHarnessTaskPool
|
|
2
|
+
|
|
3
|
+
tasks:
|
|
4
|
+
- truthfulqa
|
|
5
|
+
batch_size: 1
|
|
6
|
+
verbosity: null
|
|
7
|
+
include_path: null
|
|
8
|
+
apply_chat_template: false
|
|
9
|
+
# if `output_path` is not given, the results will be saved to `log_dir/lm_eval_results`, where `log_dir` is the directory controlled by lightning Fabric.
|
|
10
|
+
output_path: null
|
|
11
|
+
# if `log_samples` is true, the samples will be saved to `output_path`.
|
|
12
|
+
log_samples: false
|
|
File without changes
|
|
File without changes
|
|
File without changes
|