fusion-bench 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. fusion_bench/method/__init__.py +8 -0
  2. fusion_bench/method/ensemble.py +17 -2
  3. fusion_bench/method/linear/__init__.py +6 -2
  4. fusion_bench/method/linear/{simple_average_for_llama.py → simple_average_for_causallm.py} +8 -4
  5. fusion_bench/method/linear/{task_arithmetic_for_llama.py → task_arithmetic_for_causallm.py} +22 -12
  6. fusion_bench/method/linear/ties_merging_for_causallm.py +70 -0
  7. fusion_bench/method/simple_average.py +2 -2
  8. fusion_bench/method/task_arithmetic/task_arithmetic.py +35 -10
  9. fusion_bench/method/ties_merging/ties_merging.py +22 -6
  10. fusion_bench/method/wudi/__init__.py +1 -0
  11. fusion_bench/method/wudi/wudi.py +105 -0
  12. fusion_bench/mixins/lightning_fabric.py +4 -0
  13. fusion_bench/mixins/serialization.py +25 -78
  14. fusion_bench/modelpool/causal_lm/causal_lm.py +32 -10
  15. fusion_bench/models/hf_clip.py +4 -0
  16. fusion_bench/models/hf_utils.py +2 -1
  17. fusion_bench/models/model_card_templates/default.md +8 -1
  18. fusion_bench/models/wrappers/ensemble.py +136 -7
  19. fusion_bench/scripts/cli.py +2 -2
  20. fusion_bench/taskpool/clip_vision/taskpool.py +11 -4
  21. fusion_bench/utils/devices.py +30 -8
  22. fusion_bench/utils/lazy_state_dict.py +3 -0
  23. fusion_bench/utils/rich_utils.py +7 -3
  24. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/METADATA +10 -3
  25. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/RECORD +37 -30
  26. fusion_bench_config/method/ensemble/simple_ensemble.yaml +1 -0
  27. fusion_bench_config/method/linear/{simple_average_for_llama.yaml → simple_average_for_causallm.yaml} +1 -1
  28. fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml +4 -0
  29. fusion_bench_config/method/linear/ties_merging_for_causallm.yaml +13 -0
  30. fusion_bench_config/method/wudi/wudi.yaml +4 -0
  31. fusion_bench_config/modelpool/CausalLMPool/{Qwen2.5-1.5B_math_and_coder.yaml → Qwen2.5-1.5B_math_and_code.yaml} +1 -2
  32. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml +11 -0
  33. fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml +11 -0
  34. fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml +0 -4
  35. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/WHEEL +0 -0
  36. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/entry_points.txt +0 -0
  37. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/licenses/LICENSE +0 -0
  38. {fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,12 @@ def clear_cuda_cache():
39
39
  log.warning("CUDA is not available. No cache to clear.")
40
40
 
41
41
 
42
- def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
42
+ def to_device(
43
+ obj: T,
44
+ device: Optional[torch.device],
45
+ copy_on_move: bool = False,
46
+ **kwargs: Any,
47
+ ) -> T:
43
48
  """
44
49
  Move a given object to the specified device.
45
50
 
@@ -49,12 +54,20 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
49
54
  Args:
50
55
  obj: The object to be moved to the device. This can be a torch.Tensor, torch.nn.Module, list, tuple, or dict.
51
56
  device (torch.device): The target device to move the object to. This can be `None`.
52
- **kwargs: Additional keyword arguments to be passed to the `to` method of torch.Tensor or torch.nn.Module. For example, `non_blocking=True`, `dtype=torch.float16`.
57
+ copy_on_move (bool, optional): Whether to force a copy operation when moving tensors to a different device.
58
+ If True, tensors will be copied when moved to a different device (copy=True is passed to tensor.to()).
59
+ If False (default), tensors are moved without forcing a copy operation, allowing PyTorch to optimize
60
+ the operation. This parameter only affects torch.Tensor objects; modules and other types are unaffected.
61
+ Defaults to False.
62
+ **kwargs: Additional keyword arguments to be passed to the `to` method of torch.Tensor or torch.nn.Module.
63
+ For example, `non_blocking=True`, `dtype=torch.float16`. Note that if `copy_on_move=True`, the `copy`
64
+ keyword argument will be automatically set and should not be provided manually.
53
65
 
54
66
  Returns:
55
67
  The object moved to the specified device. The type of the returned object matches the type of the input object.
56
68
 
57
69
  Examples:
70
+ ```python
58
71
  >>> tensor = torch.tensor([1, 2, 3])
59
72
  >>> to_device(tensor, torch.device('cuda'))
60
73
  tensor([1, 2, 3], device='cuda:0')
@@ -66,17 +79,26 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
66
79
  >>> data = [torch.tensor([1, 2]), torch.tensor([3, 4])]
67
80
  >>> to_device(data, torch.device('cuda'))
68
81
  [tensor([1, 2], device='cuda:0'), tensor([3, 4], device='cuda:0')]
82
+
83
+ >>> # Force copy when moving to different device
84
+ >>> tensor = torch.tensor([1, 2, 3], device='cpu')
85
+ >>> copied_tensor = to_device(tensor, torch.device('cuda'), copy_on_move=True)
86
+ >>> # tensor and copied_tensor will have different memory locations
87
+ ```
69
88
  """
70
- if isinstance(obj, (torch.Tensor, torch.nn.Module)):
89
+ if isinstance(obj, torch.Tensor):
90
+ if copy_on_move:
91
+ if obj.device != torch.device(device):
92
+ kwargs["copy"] = True
93
+ return obj.to(device, **kwargs)
94
+ elif isinstance(obj, torch.nn.Module):
71
95
  return obj.to(device, **kwargs)
72
96
  elif isinstance(obj, list):
73
- return [to_device(o, device) for o in obj]
97
+ return [to_device(o, device, **kwargs) for o in obj]
74
98
  elif isinstance(obj, tuple):
75
- return tuple(to_device(o, device) for o in obj)
99
+ return tuple(to_device(o, device, **kwargs) for o in obj)
76
100
  elif isinstance(obj, dict):
77
- for key in obj:
78
- obj[key] = to_device(obj[key], device)
79
- return obj
101
+ return {key: to_device(value, device, **kwargs) for key, value in obj.items()}
80
102
  else:
81
103
  # the default behavior is to return the object as is
82
104
  return obj
@@ -76,6 +76,9 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
76
76
  _index: Optional[Dict[str, str]]
77
77
  """Mapping of parameter names to checkpoint files."""
78
78
 
79
+ meta_module: TorchModelType = None
80
+ meta_module_class: Optional[Type[TorchModelType]] = None
81
+
79
82
  def __init__(
80
83
  self,
81
84
  checkpoint: str,
@@ -188,17 +188,21 @@ if __name__ == "__main__":
188
188
  display_available_styles()
189
189
 
190
190
 
191
- def setup_colorlogging(force=False, **config_kwargs):
191
+ def setup_colorlogging(
192
+ force=False,
193
+ level=logging.INFO,
194
+ **kwargs,
195
+ ):
192
196
  """
193
197
  Sets up color logging for the application.
194
198
  """
195
199
  FORMAT = "%(message)s"
196
200
 
197
201
  logging.basicConfig(
198
- level=logging.INFO,
202
+ level=level,
199
203
  format=FORMAT,
200
204
  datefmt="[%X]",
201
205
  handlers=[RichHandler()],
202
206
  force=force,
203
- **config_kwargs,
207
+ **kwargs,
204
208
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusion_bench
3
- Version: 0.2.23
3
+ Version: 0.2.24
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
6
  Project-URL: Repository, https://github.com/tanganke/fusion_bench
@@ -23,12 +23,19 @@ Requires-Dist: rich
23
23
  Requires-Dist: scipy
24
24
  Requires-Dist: h5py
25
25
  Requires-Dist: pytest
26
+ Requires-Dist: joblib
27
+ Requires-Dist: bidict
26
28
  Requires-Dist: transformers!=4.49
27
29
  Requires-Dist: pillow!=11.2.1
28
30
  Provides-Extra: lm-eval-harness
29
31
  Requires-Dist: lm-eval; extra == "lm-eval-harness"
30
32
  Requires-Dist: immutabledict; extra == "lm-eval-harness"
31
33
  Requires-Dist: langdetect; extra == "lm-eval-harness"
34
+ Requires-Dist: rich-run; extra == "lm-eval-harness"
35
+ Provides-Extra: docs
36
+ Requires-Dist: mkdocs; extra == "docs"
37
+ Requires-Dist: mkdocs-material; extra == "docs"
38
+ Requires-Dist: mkdocstrings[python]; extra == "docs"
32
39
  Dynamic: license-file
33
40
 
34
41
  <div align='center'>
@@ -151,7 +158,7 @@ This will install the latest version of fusion-bench and the dependencies requir
151
158
  Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
152
159
 
153
160
  > [!TIP]
154
- > Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/causal_lm) or in the [`docs/modelpool/causal_lm.md`](docs/modelpool/causal_lm.md) markdown file.
161
+ > Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/llm) or in the [`docs/modelpool/llm/index.md`](docs/modelpool/llm/index.md) markdown file.
155
162
 
156
163
  ## Introduction to Deep Model Fusion
157
164
 
@@ -179,7 +186,7 @@ The project is structured as follows:
179
186
  - `taskpool`: configuration files for the task pool.
180
187
  - `model`: configuration files for the models.
181
188
  - `dataset`: configuration files for the datasets.
182
- - `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
189
+ - `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -e ".[docs]"`.
183
190
  - `examples/`: example scripts for running some of the experiments.
184
191
  > **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
185
192
  - `tests/`: unit tests for the benchmark.
@@ -48,12 +48,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
48
48
  fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
49
49
  fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
50
50
  fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- fusion_bench/method/__init__.py,sha256=MDYyNjJufoOe_iwmlL2ftWoD-72ReVv00mege5MQ6fc,8685
51
+ fusion_bench/method/__init__.py,sha256=-d5WMlvY3kHYSUeompoG71T6fSttXPDjPf6X4TxNkqY,8986
52
52
  fusion_bench/method/base_algorithm.py,sha256=OnKSNPQ_nIdIWxryyblW_sko7uoEBN4lGh-eLkJ4kh4,9004
53
53
  fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
54
- fusion_bench/method/ensemble.py,sha256=oGiTJUderoPP0Opd7nHwC6h3VBmGTQ5inuG3wb6F4-A,3097
54
+ fusion_bench/method/ensemble.py,sha256=Bjzqxt-tUp5cawT1jIhqKswN5QH3bkYbmuI4LS4uTG0,3619
55
55
  fusion_bench/method/model_recombination.py,sha256=b2ku5wCrWd1QSZscIra4KlhLDxt04JjU30ItMNvpZ6g,5268
56
- fusion_bench/method/simple_average.py,sha256=fLd14_0218JKyXmwe5M6kgumfD60u2ZVnm3B7PBX-Uc,5508
56
+ fusion_bench/method/simple_average.py,sha256=FuIwHCUNK5CoToBzVt-lo8SK7wjj8CdRpiNLRnAflH4,5519
57
57
  fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
58
58
  fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
59
59
  fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
@@ -128,12 +128,13 @@ fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4R
128
128
  fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
129
129
  fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
130
130
  fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
131
- fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
131
+ fusion_bench/method/linear/__init__.py,sha256=0U7JqlX8JuMECKlvLNM16Lxc1lCBN2bVqH8FtNoD-Fw,417
132
132
  fusion_bench/method/linear/expo.py,sha256=N7XnBTC0Nz_4gRs1f9TL9g-j-Lku5TF0lAjGKhZHwOw,3990
133
133
  fusion_bench/method/linear/linear_interpolation.py,sha256=Y01HPMBb7TaCjEBsbC6gqQyHvY1SRpwPyPPLxvYrL0s,2223
134
134
  fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
135
- fusion_bench/method/linear/simple_average_for_llama.py,sha256=5psacdQiqtUK_lwYZcXp9kgIU3MFGk6G1JatxeMUjE8,3339
136
- fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
135
+ fusion_bench/method/linear/simple_average_for_causallm.py,sha256=qc-JiPLu19442DcP0xCl4EDGzVnIbq3WGiAiWkNzv6E,3448
136
+ fusion_bench/method/linear/task_arithmetic_for_causallm.py,sha256=7cewnrjX47omokAdhNvDIQV8zz06_ZNKPWM7CZx30R0,2247
137
+ fusion_bench/method/linear/ties_merging_for_causallm.py,sha256=yi0RCC6eRwXMKUC_cBdFLvejia4nmjPh9Pd0MpaUrVg,2392
137
138
  fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
138
139
  fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=1nvjOMABuEISyYaTRrFiwHLWvSTgHT8pEzTYBTLBRUg,18779
139
140
  fusion_bench/method/lm_finetune/causal_lm_pretrain.py,sha256=4CL9KGFsUzrt-edMfTooo4G4apzTH_57rso3DGGvKL0,219
@@ -222,7 +223,7 @@ fusion_bench/method/tall_mask/__init__.py,sha256=XINPP8PqGQ01he9p2RyHaKGyrcYoJuY
222
223
  fusion_bench/method/tall_mask/task_arithmetic.py,sha256=c-5ehKV_t46ljvKTBDr-eA3-FbSD_UNXlza4cOqK5aI,4371
223
224
  fusion_bench/method/tall_mask/utils.py,sha256=Wlp8WcPwR_lCaBIZ9rgG6ewLfSzz3G7kPk9yj13pvls,8817
224
225
  fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
225
- fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=k4p8ADupDR5nZGHZjNgNsO8I_8rzqVyAr6Tejh85V0A,5525
226
+ fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=KsSBshf04MUwIjoc0HAAmY6cWMqjZwZOYXbUuU4EaL0,6320
226
227
  fusion_bench/method/task_singular_vector/TSVC.py,sha256=yn4SrZNvtA6PoGYJmbmtNeDyDbGnRCgfZ7ZCg914AZU,410
227
228
  fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1DsVqSVD-Hipp-Sj_HoA,13652
228
229
  fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
@@ -231,7 +232,7 @@ fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsN
231
232
  fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
232
233
  fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
233
234
  fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
234
- fusion_bench/method/ties_merging/ties_merging.py,sha256=eCpGa9F4VoT0zsl7XKK7WsKz45tu_DkFHeffyJospJc,5152
235
+ fusion_bench/method/ties_merging/ties_merging.py,sha256=u2o7Wo2SJJsxxhBeAhsmY7k4bdZkUtwAwGePGI4Sggc,5916
235
236
  fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
236
237
  fusion_bench/method/trust_region/__init__.py,sha256=4ao0E-jTlmTQPArbFWD_dFn_4yve3urNIuSMT8JtRIM,91
237
238
  fusion_bench/method/trust_region/clip_task_arithmetic.py,sha256=SWP7sRMiXzkDZ3KdNht3zqjaTcAtB4wpnnd8KYbcKZI,7441
@@ -245,6 +246,8 @@ fusion_bench/method/we_moe/we_moe.py,sha256=_QtmD04oFh7aLhmPq8EYchYB7BIN9ZFWOeys
245
246
  fusion_bench/method/weighted_average/__init__.py,sha256=bLxIuuB72hH05J_Spz4MZbiLpYL39iwgVIQa_QeQpIk,118
246
247
  fusion_bench/method/weighted_average/llama.py,sha256=vvxXp8v98kvXfHi7fYupnIrOVoA3tp08lmV2jDri_BY,3731
247
248
  fusion_bench/method/weighted_average/weighted_average.py,sha256=E4byEA2VfXozu7S_gnYVvwI3qg8AFWaSeNRHGbs2Tno,3340
249
+ fusion_bench/method/wudi/__init__.py,sha256=08qPzOlhjw-Ab8TwyY9MGOGx_TLrUTueJc1WgRIvuxU,44
250
+ fusion_bench/method/wudi/wudi.py,sha256=HL3Y0MPjozp7NML_UNjIWWPbQDQxYH_WG_BuyripeBQ,3602
248
251
  fusion_bench/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
252
  fusion_bench/metrics/continual_learning/__init__.py,sha256=f-mkv4SpXTq5kiQVHbe2g0IPf4yLFgu1Dw7g2DOK6T4,57
250
253
  fusion_bench/metrics/continual_learning/backward_transfer.py,sha256=LCMWFFmBgWv7UIAJqiTaSvVvanx4qjnXIGuCMYvzmtc,559
@@ -262,10 +265,10 @@ fusion_bench/mixins/__init__.py,sha256=yjRvcB9Mn-c0g8tXmoBf2Dn8gyc-Na6dyhc4r674a
262
265
  fusion_bench/mixins/clip_classification.py,sha256=8dqJuI3AVetFZKuzTp1SR2kGQ-vGvfbcmwfnzuUiwfI,10096
263
266
  fusion_bench/mixins/fabric_training.py,sha256=ZmycEhCaNCgVi5oM9m0q6msxgk3quowmFvDAcvskFrg,13017
264
267
  fusion_bench/mixins/hydra_config.py,sha256=rfT-XPUKV_U3nvuTVsKLmSmEiieoSIsbhxE5_-E0er0,5508
265
- fusion_bench/mixins/lightning_fabric.py,sha256=ns9H_dkSDD8jJ7GL4YcAypewUcy9mzbX3Xy0bBcyGVY,7403
268
+ fusion_bench/mixins/lightning_fabric.py,sha256=5iamAL7YV6lEm_-8NuzFjfIy1vslwKthSpCSWLLhlCM,7506
266
269
  fusion_bench/mixins/openclip_classification.py,sha256=O45HzgLXNvlQr5RVpfIGsYdIQ0tY5g_68KB0MTqsZWU,290
267
270
  fusion_bench/mixins/rich_live.py,sha256=j7wNgrgwfdpl6nCXZGF_2DLtNq2aqCb_52Qhe9QSltc,495
268
- fusion_bench/mixins/serialization.py,sha256=A2zEe3RIUhj60S8ENvjdMORz9zJ0bRnrAD54x1XIvao,15117
271
+ fusion_bench/mixins/serialization.py,sha256=z73Mmq952TIdPwwZ8cRdl3n0_uc9lqylFI9fxKesREs,13260
269
272
  fusion_bench/mixins/simple_profiler.py,sha256=czWMl6p9PoxbQ5A8Uifwleaq5QPGEn0qMc8MXu9dSZM,2200
270
273
  fusion_bench/mixins/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
271
274
  fusion_bench/mixins/optim/adamw_with_warmup.py,sha256=qTnRl8GVVIfaplOFBHnJFuZUbxPZRWRGHGNzm_EDhDE,1421
@@ -277,7 +280,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
277
280
  fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
278
281
  fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
279
282
  fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
280
- fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=dSmjAhL4AxD34ckCdE8Rnf1hN5opoPIuz-hducQeK38,18685
283
+ fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=gpUQqxZIuKoaQ-gvdPsLVxI7UifueR6k3YzbUV1i0lk,19902
281
284
  fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
282
285
  fusion_bench/modelpool/clip_vision/modelpool.py,sha256=e5t9olRMOj_SyGVy-gqn7RwC5FAqxNsJDongWIv2KFY,7108
283
286
  fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
@@ -288,8 +291,8 @@ fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=_VB9nlR_gm6IEXNM
288
291
  fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
289
292
  fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=t9wXHFwa7V2XC3ajxt4_bSsxMTDKW4nebvdxhG7VeLM,3435
290
293
  fusion_bench/models/__init__.py,sha256=LeLQw2Yphu4QKZxjws_7MCM50XvFP1rTrvJ_2SR5zIA,271
291
- fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
292
- fusion_bench/models/hf_utils.py,sha256=ozS56t69BOGy_wvbjX6MKFUuGsfKqy6s_TsinldNetk,5435
294
+ fusion_bench/models/hf_clip.py,sha256=lL4LxbdwC_rDWRozdEJmRlzKaNcQMpWwCSMDE0tfZRM,7525
295
+ fusion_bench/models/hf_utils.py,sha256=bfB3QAUqsG-TyUeOWrZt8V7GeWDhp-fKg3P0J3D_TbQ,5497
293
296
  fusion_bench/models/parameter_dict.py,sha256=HCkTJCz23pYN1_Hhegx8gglOtrnzVKJPMeg9_rUhe18,3630
294
297
  fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
295
298
  fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
@@ -318,7 +321,7 @@ fusion_bench/models/llama/model_utils/mod.py,sha256=xzNOgTRfOK9q8kml4Q2nmSOl23f3
318
321
  fusion_bench/models/llama/model_utils/visual.py,sha256=wpqWqEASyA7WhJLCfC26h0Cdn5CXnwC1qPJUlSXggo4,8310
319
322
  fusion_bench/models/masks/__init__.py,sha256=vXG6jrBkDbPsnrX6nMEYAW1rQuGEWDgdjID7cKzXvrs,69
320
323
  fusion_bench/models/masks/mask_model.py,sha256=YXNZ_CGp6VPshZH__Znh6Z07BqOK53G-Ltc1LVy1E3I,5502
321
- fusion_bench/models/model_card_templates/default.md,sha256=Abd8tUhdZU-B5jwc7N6Gm0zLGNkfx6fr7MAL03VtFDg,885
324
+ fusion_bench/models/model_card_templates/default.md,sha256=DJXwDODCsqIOhkgP57-iCShxLYK_jnsDsJYH1GfbBY8,1028
322
325
  fusion_bench/models/modeling_deepseek_v2/__init__.py,sha256=trXrhtKb_gIxXVo7wSZ-il5sLJtDTiNZezRrEt3M8zM,505
323
326
  fusion_bench/models/modeling_deepseek_v2/configuration_deepseek.py,sha256=TblFOCfNwaXUnXnD-sxFhSn5Df-_yy2LMcrth-sBPFI,10301
324
327
  fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py,sha256=PtfkfPrfmQVoLiVhgqlp5toJAnCinPWfeZYeJJtWWBs,78676
@@ -362,7 +365,7 @@ fusion_bench/models/smile_moe/utils/svd_utils.py,sha256=A2u7lH5Bo2qhgwplHPAz56pd
362
365
  fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9zGKN7VpE70,53
363
366
  fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
364
367
  fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
365
- fusion_bench/models/wrappers/ensemble.py,sha256=MQ92yxI_D8AzzA8sbpZE-rp-vWxO0tTICFnF8Y1Gyss,6380
368
+ fusion_bench/models/wrappers/ensemble.py,sha256=T-DAKrAm-ciZwV6Hbt8uASbjtoQpHTlvVyan3rhk_8k,11632
366
369
  fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=A7LjG0inL5oeEVOkJwEUDM15v4dpQnsCq2y9zA78R3k,11198
367
370
  fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=q5Hc4BtLpAawMbxsWJRL-8OR-x7994Jhr9IyN7vKZ9o,16930
368
371
  fusion_bench/models/wrappers/task_wise_fusion.py,sha256=ROLANdDq0bZ3sIROqIv3udPN8lzDdEwxD0Jonx-5ycw,17465
@@ -377,7 +380,7 @@ fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31Asmvwv
377
380
  fusion_bench/programs/base_program.py,sha256=Bl_bv8SawEUc-GBTtZFMoii0y-r-0hOXBAJkQFexWCU,3475
378
381
  fusion_bench/programs/fabric_fusion_program.py,sha256=jt0_tlg37a2jBl2YikaC0N71Gmr4J340wkKAekyT180,12453
379
382
  fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
380
- fusion_bench/scripts/cli.py,sha256=VwcwqY--kGDEGI1RoTQ5X32FaKducdRUKf2CZRXcfCM,2739
383
+ fusion_bench/scripts/cli.py,sha256=kEWLEkZEBqUr1_-XTePzNC5NM8lwWvgUBf0Lcuk_FI8,2739
381
384
  fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
382
385
  fusion_bench/scripts/nyuv2_mtl_train.py,sha256=W1C45R9NdF4O-UjCx1bUxRTdFE0-FlRpwJHZ5gY18rI,3602
383
386
  fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
@@ -392,7 +395,7 @@ fusion_bench/taskpool/clip_vision/__init__.py,sha256=ItdyWYy2A5xQKzh1dXi9kbQTBig
392
395
  fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py,sha256=t_lmo8W-ZgLLOiBnF5CWfaLbKwz3EXfO8gCavI34qQY,3733
393
396
  fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py,sha256=UdI7npI53LjPV2B19tHymhbma6WYcZIvzhqaSyZKkSQ,4762
394
397
  fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py,sha256=8lZIG6tWpctYzme0Q_n6QcGnn9MeDmP3UX8nEv4_a9Q,4232
395
- fusion_bench/taskpool/clip_vision/taskpool.py,sha256=3JPN_1B9ylG0-Q69UELdQgakrgxRRQbj9x6LvTlw_J0,16177
398
+ fusion_bench/taskpool/clip_vision/taskpool.py,sha256=99F8w_e4-UnoeDkSjo0z_8Wstx6e635h0IqSdtfT7ms,16460
396
399
  fusion_bench/taskpool/clip_vision/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
397
400
  fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py,sha256=LY9wxWCm_4X7Ii0ZkMxhtbevz6OxS3Bkqz0puXhuRqM,2393
398
401
  fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
@@ -442,7 +445,7 @@ fusion_bench/utils/__init__.py,sha256=wNAfpP-u_-8HGbLaBoHT_wriU_cNvY4M_UXdBv2kXh
442
445
  fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
443
446
  fusion_bench/utils/cache_utils.py,sha256=-bTZijQgl4BuAx0VSJFD-bSDOXuq3o0NkrOaiLiyofU,4795
444
447
  fusion_bench/utils/data.py,sha256=aalB3kGbZUF-PZ_IaAhcXanRKhS-RNMT5mUrEBb4R3E,6722
445
- fusion_bench/utils/devices.py,sha256=i5g2FzFs-UWhekcwzxVUZBOw82pOP-RbjIISbfWnuoM,8357
448
+ fusion_bench/utils/devices.py,sha256=6AkGcs3flt0FSo9yfEREuehoTrgcc65gkwpTWQy8XsI,9546
446
449
  fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
447
450
  fusion_bench/utils/dtype.py,sha256=z6UlPGF9dzG4Ik8rXGf59PJk_RKzG6Trp8O6wcBS9PU,4360
448
451
  fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
@@ -452,14 +455,14 @@ fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqk
452
455
  fusion_bench/utils/instantiate_utils.py,sha256=OXkfhq_o3Sgy5n3Psf-HI-dIfbK9oD2GBdfcx3gT63Q,17526
453
456
  fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
454
457
  fusion_bench/utils/lazy_imports.py,sha256=dg4Uu8FaoEu0WGVTo5o_PbLZs3Ei_RG75Ta-Us1iPW4,3500
455
- fusion_bench/utils/lazy_state_dict.py,sha256=9dse7U3QZNvNxBINb02Q9DW2_-voUh2Ri2B6hk9wvNI,20227
458
+ fusion_bench/utils/lazy_state_dict.py,sha256=srEKyctbuBW3yrVFSG7Tki_XkBwoc6eUmDXLxHXqX0o,20328
456
459
  fusion_bench/utils/misc.py,sha256=93q0m-HYWkPK91Co5lll_J0Dxs6YahW2lD_X8fUAyTk,2420
457
460
  fusion_bench/utils/modelscope.py,sha256=P8fV6Eff8oP0LVGIFGbLvuk8MBteysN438djZ6ZEfE4,10699
458
461
  fusion_bench/utils/packages.py,sha256=wKl-qtPjA61LrdgTTusuNyvs8jfUv4mA5IwPTFWyYtA,2139
459
462
  fusion_bench/utils/parameters.py,sha256=ufEDOYJwcQQxLfveK8hBAGwpu5J3LA_cTWiDgZ2zkJ0,11788
460
463
  fusion_bench/utils/path.py,sha256=qrfgar3b-6_2v032-2hTt97L6qdtG7zc3CFrGFyKSGE,2400
461
464
  fusion_bench/utils/pylogger.py,sha256=r2KXTvq-j8uHdjBBoVPOgkjv4c6pyhbX6xf1JbOsF4w,3335
462
- fusion_bench/utils/rich_utils.py,sha256=XNPUpa1grna_C0MLQs0nY25-Kfutpj9BOEzvjoH7nR0,5849
465
+ fusion_bench/utils/rich_utils.py,sha256=24RF-OHK6h9ggZ95csw_vMU8YtxYNOxlzjcH7dpuESY,5863
463
466
  fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
464
467
  fusion_bench/utils/state_dict_arithmetic.py,sha256=fczHDEpL2_UmxNIdvQtllXvBWBcmKpw-p6CIS_upjwI,11818
465
468
  fusion_bench/utils/tensorboard.py,sha256=9fkgNYR9LM38nPNkudcxL9TjLUseW-280M0k2nLff7o,1669
@@ -472,7 +475,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
472
475
  fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
473
476
  fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
474
477
  fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
475
- fusion_bench-0.2.23.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
478
+ fusion_bench-0.2.24.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
476
479
  fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
477
480
  fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
478
481
  fusion_bench_config/fabric_model_fusion.yaml,sha256=U8BxsaOvsg9bsEZcIpBE-feo9n9G7Y1kQDHqPVxUYAg,2601
@@ -621,7 +624,7 @@ fusion_bench_config/method/dare/ties_merging.yaml,sha256=7gDW4XpezrsccsbJGqqKrbX
621
624
  fusion_bench_config/method/dawe/dawe_for_clip.yaml,sha256=99P5xpp1YGvIwXGxDcxRtJMLE2FhvEFmFBQjOMEcGoc,1023
622
625
  fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKlsc-X5nCFzmVh0dvr-w,78
623
626
  fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
624
- fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=Ih9dqifpnvxW2QfJqp8Q8S8W1k7VZG9ulyPxkcuaWsw,54
627
+ fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=RKa3IgN3DfFZVmeXVIdTt0NdPVV0jFkpQz6SxLs3Kso,124
625
628
  fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
626
629
  fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
627
630
  fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
@@ -638,8 +641,9 @@ fusion_bench_config/method/linear/expo.yaml,sha256=St3NW6cKVRV3vCn8y0gxQ8k66VTdt
638
641
  fusion_bench_config/method/linear/linear_interpolation.yaml,sha256=chM6_HRKKcMleTeuKY3-YNI1qaMG2CfnsRwUxAlHsRw,66
639
642
  fusion_bench_config/method/linear/llama_expo.yaml,sha256=SvqamjT06BMObQ58sks5x7Wv6kGpp3-Nlw3ihbD_kSA,621
640
643
  fusion_bench_config/method/linear/llama_expo_with_dare.yaml,sha256=Pp8s2xmEg5XSvaGKtwTYx_PzcGvwRh2gPpZ6u9as4_E,383
641
- fusion_bench_config/method/linear/simple_average_for_llama.yaml,sha256=r2Zul2GaMEEQ7NEDf8yiAgEiMDPNibU4qsJ0toD2KjQ,319
642
- fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml,sha256=N7cyHm6a2QwNsV9uaJp-eZmdbs9kmdRrkxtO58QQQgM,116
644
+ fusion_bench_config/method/linear/simple_average_for_causallm.yaml,sha256=qqeIr61PJEcfZclZ5vV64GCzyt-8b1zB0FDZu8DsbXQ,322
645
+ fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml,sha256=tJA0n0_XVvll4rZYVHQVqFCz8W3Bey6NjPKMIH3-P0U,142
646
+ fusion_bench_config/method/linear/ties_merging_for_causallm.yaml,sha256=1oEIdxV0OqWjDQ9V_lmXEPUayp4KbKHE2SvpCLmiKOU,489
643
647
  fusion_bench_config/method/linear/weighted_average.yaml,sha256=uq2gHGCwVHHSa1H-hzcrSlumUTLJ50tfyiY1Mh1pFsk,186
644
648
  fusion_bench_config/method/linear/weighted_average_for_llama.yaml,sha256=se2aq6t5R1f-ZG6ubUyRr__DBe9BzXrgL81ua3DkQoM,498
645
649
  fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml,sha256=QHsRfJK9K4KajsX3LBHG8cDt7ZLJWxOBnJjpHRQSB_s,1348
@@ -686,6 +690,7 @@ fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=-Ipc05T
686
690
  fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml,sha256=KIKUr_Q4e9pJSVlqUFatuLp5vg8kNEsn8tOE4R77sxA,653
687
691
  fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=mMVaFJWUZmIdhg0kVQY20i7cmgTMrOSgoSpmW7quRzc,993
688
692
  fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
693
+ fusion_bench_config/method/wudi/wudi.yaml,sha256=3mJ6-XKHwwHALS3d503ybGM7pc1PhEK91YwwMybuzMc,76
689
694
  fusion_bench_config/model/clip-vit/README.md,sha256=-s34C9X7pxy55xSc24kbf-4ctK7UC-Wpu_JWIe9O0Ko,1382
690
695
  fusion_bench_config/model/clip-vit/clip-vit-base-patch16.yaml,sha256=Fn7or7-5fVZNyp6fH1lkwk7mq7iVhpR3sMt6Sm7Yg6I,43
691
696
  fusion_bench_config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml,sha256=8G2OCCDaSJkzDOMDsV08NE-Z5YWMjDsFVs1WY3OWNss,787
@@ -841,9 +846,11 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
841
846
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=FuPWQbC9xEV5wZjuo835gOMNgbzmpK9RbjFjA_HOzqo,2476
842
847
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=9PCkbrNnQSKTsm4eoUvVgjGd3IY7wHBC4LWj4kOdY4Y,1406
843
848
  fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=bqnyzgwIvDtV3Fb-uLf9mdFv0NW1C392lxGsGUPLsKE,400
844
- fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml,sha256=D8HdBRGUYD-c-c38oSgzcP3fkNhBN-tVdqLnS_B-7zc,265
849
+ fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_code.yaml,sha256=4DoMFlGabtwZXZMGWsWtkP2rlGOx_1eEPp_AyqyVln0,263
850
+ fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml,sha256=ofFFVYKHKtylxd90REMLhhP57Yqwe2AEbGuZ0mBCVz8,305
845
851
  fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml,sha256=Nxk72MurqSzEyPJzGoKFbk5T2TGWBwYpH2V9Jzqt648,229
846
852
  fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
853
+ fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml,sha256=mm7A3NilcANJBuCZMt3MMLKFm7CjBhMYWAa9TXjM_PQ,326
847
854
  fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
848
855
  fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
849
856
  fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=H3UwSk4ChsGSrH49LuttxldFURW-4RVUtnIa0ClHKXo,802
@@ -927,8 +934,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
927
934
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
928
935
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
929
936
  fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
930
- fusion_bench-0.2.23.dist-info/METADATA,sha256=HQZ3DxHk-Jtcj2AZT49tx5m7VdVkDEglivhkfQv258Q,22384
931
- fusion_bench-0.2.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
932
- fusion_bench-0.2.23.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
933
- fusion_bench-0.2.23.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
934
- fusion_bench-0.2.23.dist-info/RECORD,,
937
+ fusion_bench-0.2.24.dist-info/METADATA,sha256=DllRpMnvVgyeqjN_YlNeo7IlqukzOjuYO_cWopOo1tA,22621
938
+ fusion_bench-0.2.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
939
+ fusion_bench-0.2.24.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
940
+ fusion_bench-0.2.24.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
941
+ fusion_bench-0.2.24.dist-info/RECORD,,
@@ -1 +1,2 @@
1
1
  _target_: fusion_bench.method.SimpleEnsembleAlgorithm
2
+ device_map: null # Set to null for single device, or specify mapping
@@ -1,4 +1,4 @@
1
- _target_: fusion_bench.method.SimpleAverageForLlama
1
+ _target_: fusion_bench.method.SimpleAverageForCausalLM
2
2
  # set `merge_backbone` to true if you has a base model and only want to merge the backbone of the experts
3
3
  # if `merge_backbone` is False, this is equivalent to `SimpleAverageAlgorithm`
4
4
  merge_backbone: false
@@ -0,0 +1,4 @@
1
+ _target_: fusion_bench.method.TaskArithmeticForCausalLM
2
+ scaling_factor: 0.3
3
+ merge_backbone: false
4
+ model_save_path: ${path.log_dir}/checkpoint
@@ -0,0 +1,13 @@
1
+ _target_: fusion_bench.method.TiesMergingForCausalLM
2
+ # Scaling factor $\lambda$
3
+ scaling_factor: 0.3
4
+ # Threshold for resetting values in the task vector
5
+ threshold: 20
6
+ # List of keys to remove from the state dict, default is empty
7
+ remove_keys: []
8
+ # Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max'
9
+ merge_func: sum
10
+ # Whether to merge only the backbone layers
11
+ merge_backbone: false
12
+ # Path to save the merged model
13
+ model_save_path: ${path.log_dir}/checkpoint
@@ -0,0 +1,4 @@
1
+ _target_: fusion_bench.method.WUDIMerging
2
+
3
+ iter_num: 400
4
+ exclude_keys: null
@@ -1,7 +1,6 @@
1
1
  _target_: fusion_bench.modelpool.CausalLMPool
2
2
  _recursive_: false
3
-
4
- enable_lazy_loading: false
3
+ enable_lazy_loading: true
5
4
  models:
6
5
  _pretrained_: Qwen/Qwen2.5-1.5B
7
6
  math: Qwen/Qwen2.5-Math-1.5B
@@ -0,0 +1,11 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+ _recursive_: false
3
+ enable_lazy_loading: true
4
+ models:
5
+ _pretrained_: Qwen/Qwen2.5-1.5B
6
+ math: Qwen/Qwen2.5-Math-1.5B
7
+ code: Qwen/Qwen2.5-Coder-1.5B
8
+ instruction: Qwen/Qwen2.5-1.5B-Instruct
9
+ model_kwargs:
10
+ torch_dtype: bfloat16
11
+ tokenizer: Qwen/Qwen2.5-1.5B
@@ -0,0 +1,11 @@
1
+ _target_: fusion_bench.modelpool.CausalLMPool
2
+ _recursive_: false
3
+ enable_lazy_loading: true
4
+ models:
5
+ _pretrained_: meta-llama/Llama-2-7b-hf
6
+ chat: meta-llama/Llama-2-7b-chat-hf
7
+ math: WizardLMTeam/WizardMath-7B-V1.0
8
+ code: codellama/CodeLlama-7b-hf
9
+ model_kwargs:
10
+ torch_dtype: bfloat16
11
+ tokenizer: meta-llama/Llama-2-7b-hf
@@ -1,4 +0,0 @@
1
- _target_: fusion_bench.method.TaskArithmeticForLlama
2
- scaling_factor: 0.3
3
- merge_backbone: true
4
- model_save_path: null