fusion-bench 0.2.13__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. fusion_bench/method/ada_svd/clip_vision.py +4 -1
  2. fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +46 -145
  3. fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +229 -0
  4. fusion_bench/method/smile_upscaling/smile_upscaling.py +6 -336
  5. fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py +2 -203
  6. fusion_bench/models/modeling_smile_qwen2/__init__.py +8 -0
  7. fusion_bench/models/modeling_smile_qwen2/configuration_smile_qwen2.py +21 -0
  8. fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +922 -0
  9. fusion_bench/models/modeling_smile_qwen2/register.py +11 -0
  10. fusion_bench/models/rankone_moe.py +2 -88
  11. fusion_bench/models/smile_moe/linear_from_hf_config.py +373 -0
  12. fusion_bench/models/smile_moe/{linear.py → linear_from_module.py} +103 -33
  13. fusion_bench/models/smile_moe/utils/__init__.py +24 -0
  14. fusion_bench/models/smile_moe/utils/svd_utils.py +46 -0
  15. fusion_bench/taskpool/__init__.py +2 -0
  16. fusion_bench/taskpool/lm_eval_harness/__init__.py +3 -0
  17. fusion_bench/taskpool/lm_eval_harness/taskpool.py +87 -0
  18. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/METADATA +22 -2
  19. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/RECORD +27 -14
  20. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/WHEEL +1 -1
  21. fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml +5 -2
  22. fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +13 -0
  23. fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +17 -0
  24. fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml +12 -0
  25. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/entry_points.txt +0 -0
  26. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/licenses/LICENSE +0 -0
  27. {fusion_bench-0.2.13.dist-info → fusion_bench-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,24 @@
1
+ from typing import List
2
+
3
+ import torch
4
+ from torch import Tensor
5
+
6
+ from .svd_utils import svd
7
+
8
+ __all__ = ["svd_utils", "_is_all_zeros"]
9
+
10
+
11
+ def _is_all_zeros(tensor: Tensor | List[Tensor]) -> bool:
12
+ """
13
+ Check if a tensor or a list of tensors are all zeros.
14
+
15
+ Args:
16
+ tensor (Tensor | List[Tensor]): A tensor or a list of tensors.
17
+
18
+ Returns:
19
+ bool: True if all elements are zeros, False otherwise.
20
+ """
21
+ if isinstance(tensor, Tensor):
22
+ return torch.allclose(tensor, torch.zeros_like(tensor))
23
+ else:
24
+ return all(_is_all_zeros(t) for t in tensor)
@@ -0,0 +1,46 @@
1
+ from typing import Optional, Tuple, Union
2
+
3
+ import torch
4
+ from torch import Tensor
5
+
6
+
7
+ def _svd(w: Tensor, full_matrices: bool = True) -> Tuple[Tensor, Tensor, Tensor]:
8
+ """
9
+ Perform Singular Value Decomposition (SVD) on a tensor.
10
+
11
+ Args:
12
+ w (Tensor): The input tensor.
13
+ full_matrices (bool): Whether to compute the full-sized U and V matrices.
14
+
15
+ Returns:
16
+ Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
17
+ """
18
+ u, s, vh = torch.linalg.svd(
19
+ w, full_matrices=full_matrices, driver="gesvd" if w.is_cuda else None
20
+ )
21
+ v = vh.T
22
+ return u, s, v
23
+
24
+
25
+ def svd(
26
+ w: Tensor,
27
+ full_matrices: bool = True,
28
+ accelerator: Optional[Union[torch.device, str]] = None,
29
+ ) -> Tuple[Tensor, Tensor, Tensor]:
30
+ """
31
+ Perform SVD on a tensor, optionally using a specified accelerator.
32
+
33
+ Args:
34
+ w (Tensor): The input tensor.
35
+ full_matrices (bool): Whether to compute the full-sized U and V matrices.
36
+ accelerator (Optional[Union[torch.device, str]]): The device to perform the computation on.
37
+
38
+ Returns:
39
+ Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
40
+ """
41
+ if accelerator is None:
42
+ return _svd(w, full_matrices=full_matrices)
43
+ original_device = w.device
44
+ w = w.to(accelerator)
45
+ u, s, v = _svd(w)
46
+ return u.to(original_device), s.to(original_device), v.to(original_device)
@@ -15,6 +15,7 @@ _import_structure = {
15
15
  "dummy": ["DummyTaskPool"],
16
16
  "gpt2_text_classification": ["GPT2TextClassificationTaskPool"],
17
17
  "llama": ["LlamaTestGenerationTaskPool"],
18
+ "lm_eval_harness": ["LMEvalHarnessTaskPool"],
18
19
  "nyuv2_taskpool": ["NYUv2TaskPool"],
19
20
  "openclip_vision": ["OpenCLIPVisionModelTaskPool"],
20
21
  }
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
30
31
  from .dummy import DummyTaskPool
31
32
  from .gpt2_text_classification import GPT2TextClassificationTaskPool
32
33
  from .llama import LlamaTestGenerationTaskPool
34
+ from .lm_eval_harness import LMEvalHarnessTaskPool
33
35
  from .nyuv2_taskpool import NYUv2TaskPool
34
36
  from .openclip_vision import OpenCLIPVisionModelTaskPool
35
37
 
@@ -0,0 +1,3 @@
1
+ from .taskpool import LMEvalHarnessTaskPool
2
+
3
+ __all__ = ["LMEvalHarnessTaskPool"]
@@ -0,0 +1,87 @@
1
+ import logging
2
+ import os
3
+ from typing import List, Literal, Optional, Union, TYPE_CHECKING
4
+
5
+ import lightning.fabric
6
+ import lm_eval
7
+ import lm_eval.models
8
+ from lm_eval.__main__ import check_argument_types, cli_evaluate, setup_parser
9
+ from omegaconf import DictConfig, ListConfig
10
+
11
+ from fusion_bench import BaseTaskPool
12
+ from fusion_bench.mixins import LightningFabricMixin
13
+ from fusion_bench.utils.strenum import _version
14
+
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ class LMEvalHarnessTaskPool(BaseTaskPool, LightningFabricMixin):
20
+ def __init__(
21
+ self,
22
+ tasks: Union[str, List[str]],
23
+ apply_chat_template: bool = False,
24
+ include_path: Optional[str] = None,
25
+ batch_size: int = 1,
26
+ metadata: Optional[DictConfig] = None,
27
+ verbosity: Optional[
28
+ Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
29
+ ] = None,
30
+ output_path: Optional[str] = None,
31
+ log_samples: bool = False,
32
+ _usage_: Optional[str] = None,
33
+ _version_: Optional[str] = None,
34
+ **kwargs,
35
+ ):
36
+ super().__init__(_usage_=_usage_, _version_=_version_)
37
+ self.tasks = tasks
38
+ self.include_path = include_path
39
+ self.batch_size = batch_size
40
+ self.metadata = metadata
41
+ self.apply_chat_template = apply_chat_template
42
+ self.verbosity = verbosity
43
+ self.kwargs = kwargs
44
+ self.output_path = output_path
45
+ self.log_samples = log_samples
46
+
47
+ def evaluate(self, model, *command_line_args, **kwargs):
48
+ command_line_args = []
49
+ if self.include_path is not None:
50
+ command_line_args.extend(["--include_path", self.include_path])
51
+ if isinstance(self.tasks, (list, ListConfig)):
52
+ command_line_args.extend(["--tasks", ",".join(self.tasks)])
53
+ elif isinstance(self.tasks, str):
54
+ command_line_args.extend(["--tasks", self.tasks])
55
+ if self.apply_chat_template:
56
+ command_line_args.extend(
57
+ ["--apply_chat_template", str(self.apply_chat_template)]
58
+ )
59
+ if self.batch_size is not None:
60
+ command_line_args.extend(["--batch_size", str(self.batch_size)])
61
+ if self.verbosity is not None:
62
+ command_line_args.extend(["--verbosity", str(self.verbosity)])
63
+ if self.metadata is not None:
64
+ command_line_args.extend(["--metadata", str(self.metadata)])
65
+ if self.output_path is None:
66
+ command_line_args.extend(
67
+ [
68
+ "--output_path",
69
+ os.path.join(self.log_dir, "lm_eval_results"),
70
+ ]
71
+ )
72
+ else:
73
+ command_line_args.extend(["--output_path", self.output_path])
74
+ if self.log_samples:
75
+ command_line_args.extend(["--log_samples"])
76
+ for key, value in kwargs.items():
77
+ command_line_args.extend([f"--{key}", str(value)])
78
+
79
+ parser = setup_parser()
80
+ check_argument_types(parser)
81
+ args = parser.parse_args(args=command_line_args)
82
+ log.info("LM-Eval Harness arguments:\n%s", args)
83
+
84
+ if not lightning.fabric.is_wrapped(model):
85
+ model = self.fabric.setup(model)
86
+ args.model = lm_eval.models.huggingface.HFLM(pretrained=model)
87
+ cli_evaluate(args)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusion_bench
3
- Version: 0.2.13
3
+ Version: 0.2.14
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
6
  License: MIT License
@@ -45,6 +45,8 @@ Requires-Dist: rich
45
45
  Requires-Dist: scipy
46
46
  Requires-Dist: h5py
47
47
  Requires-Dist: pytest
48
+ Provides-Extra: lm-eval-harness
49
+ Requires-Dist: lm-eval; extra == "lm-eval-harness"
48
50
  Dynamic: license-file
49
51
 
50
52
  <div align='center'>
@@ -122,7 +124,7 @@ Merging multiple expert models offers a promising approach for performing multi-
122
124
 
123
125
  ## Installation
124
126
 
125
- install from PyPI:
127
+ Install from PyPI:
126
128
 
127
129
  ```bash
128
130
  pip install fusion-bench
@@ -137,6 +139,24 @@ cd fusion_bench
137
139
  pip install -e . # install the package in editable mode
138
140
  ```
139
141
 
142
+ ### Install with [Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
143
+
144
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)
145
+
146
+
147
+ ```bash
148
+ pip install "fusion-bench[lm-eval-harness]"
149
+ ```
150
+
151
+ or install from local directory
152
+
153
+ ```bash
154
+ pip install -e ".[lm-eval-harness]"
155
+ ```
156
+
157
+ This will install the latest version of fusion-bench and the dependencies required for LM-Eval Harness.
158
+ Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
159
+
140
160
  ## Introduction to Deep Model Fusion
141
161
 
142
162
  Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.
@@ -49,7 +49,7 @@ fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAc
49
49
  fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
50
50
  fusion_bench/method/simple_average.py,sha256=2ghcL1E-eLbIYDCHYCoR9WtiYSb1GvFAH163OTTTEEI,4481
51
51
  fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
52
- fusion_bench/method/ada_svd/clip_vision.py,sha256=QrT6cSwgVEGxXEpVhkvKQVQaoRW5P9V52Y3_8NX0f-o,12556
52
+ fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
53
53
  fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
54
54
  fusion_bench/method/adamerging/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
55
55
  fusion_bench/method/adamerging/clip_task_wise_adamerging.py,sha256=rREVf8SKlQ9SiWdUWOYo91b1RW9PnNJxsW8MxHs_MUo,6095
@@ -157,8 +157,9 @@ fusion_bench/method/slerp/slerp.py,sha256=2_n10REnRoV5DuwCC0bDX8RM3MLL4Q_5rZiU0h
157
157
  fusion_bench/method/slerp/slerp_utils.py,sha256=vksRo6n7FqY7By9aqbwTL4XV3BjcU_GrUl_r85Kpfjc,3504
158
158
  fusion_bench/method/smile_upscaling/__init__.py,sha256=6ZpUSHUFVsT1U7V3TIDWBFqcHte7SjHW0wp6CAE8NVg,165
159
159
  fusion_bench/method/smile_upscaling/singular_projection_merging.py,sha256=0neZS9oZnl64wu1xb9ruGB7lbhYXyy4zj8l3E1QYRGQ,6670
160
- fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py,sha256=C2WEzBcAyrgO1FpFgg3pdrPjCkeGK1PKr66XQN1WJGE,11945
161
- fusion_bench/method/smile_upscaling/smile_upscaling.py,sha256=UXmFJ2s3qb7-5hsMUbQdtkhCRx4Gh8UCiost09JyVqY,20299
160
+ fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py,sha256=T1M4hTRgIfDhy4zSccREPLajgMcdnohr0NtdWXtPzmA,8802
161
+ fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py,sha256=gRo5TKhcFhclKtpI75ixc65YrhoW3kyVr_q-JrCN8n8,8713
162
+ fusion_bench/method/smile_upscaling/smile_upscaling.py,sha256=epkurTJQdO2TlWf1v5qUSiF1Pi1Umz-uQDtHM_iX80c,8978
162
163
  fusion_bench/method/sparse_we_moe/__init__.py,sha256=V5VOpLwn6ZpsM09TmwFhhlJwMTBFXF7NE1-gW1MlAfc,133
163
164
  fusion_bench/method/sparse_we_moe/sparse_clip_we_moe.py,sha256=J8iVYks-SQ93dqh6FUQACvSmM364QHlVBYMKOCPbHrU,10288
164
165
  fusion_bench/method/sparse_we_moe/sparse_we_moe.py,sha256=6OYgj_D_4xTtqy_guA7whQu76LQ7gv-U2cIZkXe7bIg,10479
@@ -231,7 +232,7 @@ fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=sIK
231
232
  fusion_bench/models/__init__.py,sha256=TNOEH_2yAQP51m9mdWepNEf9VGUZgDthtgXbs4rhb4M,100
232
233
  fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
233
234
  fusion_bench/models/parameter_dict.py,sha256=lkVaK6xInqHoQ3_N6zx8CNKH4dnf8AP8H9xAY6ds6lg,3515
234
- fusion_bench/models/rankone_moe.py,sha256=uwpAqk1cwxxprQ0hxuAwRuPvHDxxBKBDahd9vcaafXs,14248
235
+ fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
235
236
  fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
236
237
  fusion_bench/models/sparse_we_moe.py,sha256=b-yIeCsl2rz0i7BP9g_fqCEam7KUNjNX_J8oyZV6MJ8,16509
237
238
  fusion_bench/models/utils.py,sha256=AQFI2UZSItKfJpG8ex74FPjn_SjsADLhvpv1GYqu43U,2065
@@ -261,8 +262,12 @@ fusion_bench/models/modeling_losparse_llama/register.py,sha256=rRTvc5hK9GvTiEZGq
261
262
  fusion_bench/models/modeling_losparse_llama/utils.py,sha256=REQIGeJeNhlKxm2Y7EUumtzj4vdTQQIT1dSiB22_i0o,1886
262
263
  fusion_bench/models/modeling_smile_mistral/__init__.py,sha256=q9DmZhBYwTOUsaWOs--tMZ-9zYpAa8KYEJPoNT7IZj0,1171
263
264
  fusion_bench/models/modeling_smile_mistral/configuration_smile_mistral.py,sha256=yt1-JBlkJmlJw7dvB4_V8M0gy5ihD8isDxcmwyW85d4,633
264
- fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py,sha256=FxTOryDgi8z5W13ry42xsru_nWx0tGOpZuL-ufPuxoA,39897
265
+ fusion_bench/models/modeling_smile_mistral/modeling_smile_mistral.py,sha256=5cN1M_XhcFCoJb8yvO1KCwHD_UH__hJg_X2D9C85R34,33128
265
266
  fusion_bench/models/modeling_smile_mistral/register.py,sha256=7nSJC4FveUi78rp53Ps6TcPGedHZ79cikYM5GIfEZfw,400
267
+ fusion_bench/models/modeling_smile_qwen2/__init__.py,sha256=eaf9PRQ-rZRHzL2odWpR6ha3sR0rNMylrf0ZvqGTvbU,241
268
+ fusion_bench/models/modeling_smile_qwen2/configuration_smile_qwen2.py,sha256=aekcpLcUGo4e7GkOtaxKClpIU5byyY-LQNDb-sMeyNc,621
269
+ fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py,sha256=hfn2Tmu4h8t3xiKvUpbXpjcir-aywn7rvvsQK7vLwVA,37247
270
+ fusion_bench/models/modeling_smile_qwen2/register.py,sha256=-6XqWWrUltFkJ76C6NCveJp4vPcqAoijJ3fItmDFsQs,391
266
271
  fusion_bench/models/nyuv2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
267
272
  fusion_bench/models/nyuv2/aspp.py,sha256=Nl-Kx9YmGp0BNpDedo9cYbynOwI4SUyILWN2VgiPDIc,2495
268
273
  fusion_bench/models/nyuv2/lightning_module.py,sha256=SLtC0yL6455uKeb-o07MR6v-xE4BTKm7B0E2ayQwEBU,5436
@@ -273,7 +278,10 @@ fusion_bench/models/open_clip/modeling.py,sha256=34wKcbxe5xb6fzAVdIz0QcsSXs-8FQF
273
278
  fusion_bench/models/open_clip/utils.py,sha256=YM_vGQSxIDoB2euHG54hhRGIcINJfR0NxNT5U42KRCw,10394
274
279
  fusion_bench/models/open_clip/variables_and_paths.py,sha256=_OBcKvZwSGvYSmgKtXOuekEJI-btW94Ia-BQ9n4isfY,1231
275
280
  fusion_bench/models/smile_moe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
276
- fusion_bench/models/smile_moe/linear.py,sha256=voFvx4Nnfgc6YReBcY9FUGG3WrxVRjyD3odX4jIS5Eg,8724
281
+ fusion_bench/models/smile_moe/linear_from_hf_config.py,sha256=4vzYYjDHGOf1IO7gO0dzQC1xqcwEij9M7d4tVZm-7dY,11919
282
+ fusion_bench/models/smile_moe/linear_from_module.py,sha256=Sv6rCj1FWr7wQhv2hO2waJIkoHly7HZCP4zbYn3Dk78,10815
283
+ fusion_bench/models/smile_moe/utils/__init__.py,sha256=_FfU5HAQIwl9Ch8M8_VBxLRtvqk0lWr-k1WVfWg61jA,588
284
+ fusion_bench/models/smile_moe/utils/svd_utils.py,sha256=A2u7lH5Bo2qhgwplHPAz56pdbHYUunk2PS6PSvTn19M,1407
277
285
  fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9zGKN7VpE70,53
278
286
  fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
279
287
  fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -298,7 +306,7 @@ fusion_bench/scripts/nyuv2_mtl_train.py,sha256=W1C45R9NdF4O-UjCx1bUxRTdFE0-FlRpw
298
306
  fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
299
307
  fusion_bench/scripts/clip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
300
308
  fusion_bench/scripts/clip/convert_checkpoint.py,sha256=zncgRAhInFpJDSHIm3GO4F6BzgsdAQVj3LLmV7g-JiQ,1221
301
- fusion_bench/taskpool/__init__.py,sha256=1AFQhYDmrev3HZqWgSPz_CfygVGOYP9D8nY_q7QS9nk,1207
309
+ fusion_bench/taskpool/__init__.py,sha256=-ltXMsS3jeGxa9vnhOyrbITOUtfNjLwkGPfS2mKDOdY,1312
302
310
  fusion_bench/taskpool/base_pool.py,sha256=Cbe3ZgJ34DWSDZeZEjlgqR0b84aM1i68D9-vomaooo8,852
303
311
  fusion_bench/taskpool/dummy.py,sha256=Di9JZO3XyDYn6wAGukrJMTnkS_NaxGTeQYo_3j1JD3Y,1675
304
312
  fusion_bench/taskpool/gpt2_text_classification.py,sha256=PCNdc2SNGUFGxJ0snmwrnjTdSwmDt9fs7Pe0eDjdvaw,6091
@@ -313,6 +321,8 @@ fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py,sha256=LY9wxWC
313
321
  fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
314
322
  fusion_bench/taskpool/llama/reward_model.py,sha256=ZpRSX4esBAuE0MdTjPHjqS2TnvGb6P8arOGxBeXnq6Y,5028
315
323
  fusion_bench/taskpool/llama/test_generation.py,sha256=kJ_5GruG12FsuJHDh_S7pbQgwEojTqhGpA_wVNH5KPc,6675
324
+ fusion_bench/taskpool/lm_eval_harness/__init__.py,sha256=_usNxe4z9avClSWjwHMxoznnI_UQFMuo7uOEJhP8jMk,81
325
+ fusion_bench/taskpool/lm_eval_harness/taskpool.py,sha256=2eLP4FqVxtIbYe66R1ksNGpt94QOFHmH8C9UgxlWnJ8,3280
316
326
  fusion_bench/taskpool/openclip_vision/__init__.py,sha256=02p77Mb1JE7trrv2HtIku5X667WY5LZ2zVuyL3uIcyo,59
317
327
  fusion_bench/taskpool/openclip_vision/openclip_taskpool.py,sha256=PtD_Y9CWzPI3WEil_RuXtCh8ImPKcSHtZTqfybmsGdg,6875
318
328
  fusion_bench/tasks/__init__.py,sha256=Z_ePIp4Xizkj78QopLg1dZkJAN_IF73MkbR_nkfHQ9Y,52
@@ -383,7 +393,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
383
393
  fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
384
394
  fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
385
395
  fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
386
- fusion_bench-0.2.13.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
396
+ fusion_bench-0.2.14.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
387
397
  fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
388
398
  fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
389
399
  fusion_bench_config/fabric_model_fusion.yaml,sha256=5iPgaM8UOhuvBW2Hap_csst-eqlYRwb_lru8ngjrZ_g,948
@@ -555,7 +565,8 @@ fusion_bench_config/method/regmean/gpt2_regmean.yaml,sha256=n94aTboDdwSA7Tki8l_o
555
565
  fusion_bench_config/method/regmean/regmean.yaml,sha256=ZgVVLx-lHwVgjtjTl4VZUlthh8yyua87QvoJfmNHud4,101
556
566
  fusion_bench_config/method/slerp/slerp.yaml,sha256=xldDUULtfCdwzAkQUb0C8-TmbW7FqcAlIOsPX8p4n6w,116
557
567
  fusion_bench_config/method/smile_upscaling/singular_projection_merging.yaml,sha256=ZMn_ImRjjc2uozf7ocQIzbgvFDpBV7S-34KptbBXVGo,200
558
- fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml,sha256=cah2cwaSfdwNc5AgY3soDrBHFyIOKRC5UdMPqUr-X7g,236
568
+ fusion_bench_config/method/smile_upscaling/smile_mistral_upscaling.yaml,sha256=VFMrkbO69d0wCjTQCuKysYGVe6hEwNu792g1QkhU5Mk,383
569
+ fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml,sha256=Rdcub7yFFn-jKXrlFoj8LQk1cRbJm2do91pV-YMSzTE,378
559
570
  fusion_bench_config/method/smile_upscaling/smile_upscaling.yaml,sha256=G88mabTTniDUtiUC9Vg3cj_sw6D05mE4_ZdyYI4Omjk,477
560
571
  fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256=1zfrT2FNmLyCivth4kzGR8Ai9jyQ87OXRbf4di4IE94,642
561
572
  fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=UPnMt_GoMSHOeOx5Sv0oHPRoPhhvVRC5zdVA38OTwSg,636
@@ -715,6 +726,7 @@ fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk
715
726
  fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=H3UwSk4ChsGSrH49LuttxldFURW-4RVUtnIa0ClHKXo,802
716
727
  fusion_bench_config/modelpool/CausalLMPool/llama_metamathqa.yaml,sha256=vU0q06OUa1UM_Xvp2t27Rl3F6EDgYWPnDxeyzUH-QVI,589
717
728
  fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml,sha256=MpgshGtmMXpUFRA1knjdGRVH4UgZbkkcTmCTrF3LlZk,573
729
+ fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml,sha256=Kbpam1Hds5URMP35dXGdVibH-vTmYPh3xHMkhj6Mgtg,648
718
730
  fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml,sha256=FynhZ1PRvyzsyzrHIuMpGgQGRMlu_xI7earm-CeIVeY,824
719
731
  fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=zQWfp7mYm6jQ8g41Eeh2d9vAbocZJ5btPX1ft9QpEZU,546
720
732
  fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md,sha256=DC0HF-isCHshipHTC0Rof6GvjTUa0i2DVQZKrklQQlU,2416
@@ -774,11 +786,12 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397
774
786
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml,sha256=2AqMiNCRRunLIrssHvFzu1lUzOaQn8uOHM9yjrQq-_A,109
775
787
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=DNm1LRlQS9KbukEl6oEZzWLizyaOBcYZ2r7L8ZQtnJc,434
776
788
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=EjN3Pu1F_7EuZrk-geyL4qohqJ5-F2UFjWjj2V57ju0,433
789
+ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=45kSz44pcjTDCL3dnEECRMnN0kIaoWKUFZMFy5JJIyw,416
777
790
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
778
791
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
779
792
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
780
- fusion_bench-0.2.13.dist-info/METADATA,sha256=RB7s0Jppl2mX5evPosbdXONlF__hlFeZf8dF4Qf36tQ,20085
781
- fusion_bench-0.2.13.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
782
- fusion_bench-0.2.13.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
783
- fusion_bench-0.2.13.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
784
- fusion_bench-0.2.13.dist-info/RECORD,,
793
+ fusion_bench-0.2.14.dist-info/METADATA,sha256=X13MPJ_FA0D5Gc5T-CvbcYOK03QtTiyIHnDNbI7_aOo,20904
794
+ fusion_bench-0.2.14.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
795
+ fusion_bench-0.2.14.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
796
+ fusion_bench-0.2.14.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
797
+ fusion_bench-0.2.14.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,9 +1,12 @@
1
- name: smile_mistral_upscaling
1
+ _target_: fusion_bench.method.smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm
2
+ # device to put the models on
2
3
  device: cpu
4
+ # device to perform SVD on
3
5
  accelerator: cuda
4
6
  # path to save/load the model
5
7
  model_path: null
6
- model_dtype: float16
8
+ model_dtype: null
9
+ # SmileMoE parameters
7
10
  num_experts_per_tok: 1
8
11
  rank_of_router: 8
9
12
  # if rank_of_expert < 0, dense expert is used.
@@ -0,0 +1,13 @@
1
+ _target_: fusion_bench.method.smile_upscaling.smile_qwen2_upscaling.SmileQwen2UpscalingAlgorithm
2
+ # device to put the models on
3
+ device: cpu
4
+ # device to perform SVD on
5
+ accelerator: cuda
6
+ # path to save/load the model
7
+ model_path: null
8
+ model_dtype: null
9
+ # SmileMoE parameters
10
+ num_experts_per_tok: 1
11
+ rank_of_router: 8
12
+ # if rank_of_expert < 0, dense expert is used.
13
+ rank_of_expert: 64
@@ -0,0 +1,17 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+ _recursive_: false
3
+ models:
4
+ _pretrained_:
5
+ _target_: transformers.AutoModelForCausalLM.from_pretrained
6
+ pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
7
+ expert_1:
8
+ _target_: transformers.AutoModelForCausalLM.from_pretrained
9
+ pretrained_model_name_or_path: Qwen/Qwen2.5-Math-1.5B
10
+ expert_2:
11
+ _target_: transformers.AutoModelForCausalLM.from_pretrained
12
+ pretrained_model_name_or_path: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
13
+ model_kwargs:
14
+ torch_dtype: bfloat16
15
+ tokenizer:
16
+ _target_: transformers.AutoTokenizer.from_pretrained
17
+ pretrained_model_name_or_path: Qwen/Qwen2.5-1.5B
@@ -0,0 +1,12 @@
1
+ _target_: fusion_bench.taskpool.LMEvalHarnessTaskPool
2
+
3
+ tasks:
4
+ - truthfulqa
5
+ batch_size: 1
6
+ verbosity: null
7
+ include_path: null
8
+ apply_chat_template: false
9
+ # if `output_path` is not given, the results will be saved to `log_dir/lm_eval_results`, where `log_dir` is the directory controlled by lightning Fabric.
10
+ output_path: null
11
+ # if `log_samples` is true, the samples will be saved to `output_path`.
12
+ log_samples: false