fusion-bench 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. fusion_bench/__init__.py +22 -2
  2. fusion_bench/_get_started/__init__.py +3 -0
  3. fusion_bench/_get_started/greeting_program.py +49 -0
  4. fusion_bench/compat/method/base_algorithm.py +14 -0
  5. fusion_bench/constants/__init__.py +6 -0
  6. fusion_bench/constants/clip_vision.py +26 -2
  7. fusion_bench/constants/paths.py +4 -0
  8. fusion_bench/constants/runtime.py +57 -0
  9. fusion_bench/dataset/clip_dataset.py +2 -1
  10. fusion_bench/dataset/gpt2_glue.py +9 -9
  11. fusion_bench/dataset/image_corruption/__init__.py +0 -0
  12. fusion_bench/dataset/image_corruption/make_corruption.py +179 -0
  13. fusion_bench/dataset/image_dataset.py +1 -1
  14. fusion_bench/dataset/nyuv2.py +2 -2
  15. fusion_bench/method/__init__.py +24 -5
  16. fusion_bench/method/adamerging/clip_layer_wise_adamerging.py +1 -1
  17. fusion_bench/method/adamerging/clip_task_wise_adamerging.py +11 -7
  18. fusion_bench/method/adamerging/layer_wise_adamerging.py +11 -5
  19. fusion_bench/method/base_algorithm.py +195 -12
  20. fusion_bench/method/bitdelta/__init__.py +5 -0
  21. fusion_bench/method/bitdelta/bitdelta.py +156 -0
  22. fusion_bench/method/bitdelta/bitdelta_utils/__init__.py +0 -0
  23. fusion_bench/method/bitdelta/bitdelta_utils/binary_gemm_kernel.py +462 -0
  24. fusion_bench/method/bitdelta/bitdelta_utils/data.py +35 -0
  25. fusion_bench/method/bitdelta/bitdelta_utils/diff.py +129 -0
  26. fusion_bench/method/classification/clip_finetune.py +1 -1
  27. fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py +0 -1
  28. fusion_bench/method/depth_upscaling/depth_upscaling.py +4 -9
  29. fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py +4 -5
  30. fusion_bench/method/doge_ta/doge_ta.py +1 -1
  31. fusion_bench/method/ensemble.py +12 -12
  32. fusion_bench/method/expert_sparsity/utils/calibration_data.py +1 -1
  33. fusion_bench/method/fisher_merging/clip_fisher_merging.py +2 -6
  34. fusion_bench/method/fisher_merging/fisher_merging.py +6 -15
  35. fusion_bench/method/fisher_merging/gpt2_fisher_merging.py +3 -10
  36. fusion_bench/method/fw_merging/fw_hard.py +1 -1
  37. fusion_bench/method/fw_merging/fw_soft.py +1 -1
  38. fusion_bench/method/gossip/clip_layer_wise_gossip.py +4 -5
  39. fusion_bench/method/linear/expo.py +2 -1
  40. fusion_bench/method/linear/linear_interpolation.py +6 -4
  41. fusion_bench/method/linear/simple_average_for_llama.py +17 -13
  42. fusion_bench/method/lm_finetune/bradley_terry_rm.py +2 -2
  43. fusion_bench/method/mixture_of_experts/mixtral_upcycling.py +9 -26
  44. fusion_bench/method/model_recombination.py +2 -5
  45. fusion_bench/method/moe_pruner/hooks/__init__.py +1 -2
  46. fusion_bench/method/moe_pruner/utils/data.py +2 -1
  47. fusion_bench/method/moe_pruner/utils/prune.py +6 -1
  48. fusion_bench/method/pruning/llama_magnitude_prune.py +1 -1
  49. fusion_bench/method/pruning/wanda_utils/data.py +1 -2
  50. fusion_bench/method/pwe_moe/clip_pwe_moe.py +12 -34
  51. fusion_bench/method/randes/modelsoup.py +1 -3
  52. fusion_bench/method/regmean/clip_regmean.py +2 -2
  53. fusion_bench/method/regmean/gpt2_regmean.py +3 -10
  54. fusion_bench/method/regmean/regmean.py +2 -11
  55. fusion_bench/method/regmean_plusplus/__init__.py +1 -1
  56. fusion_bench/method/regmean_plusplus/clip_regmean_plusplus.py +24 -17
  57. fusion_bench/method/regmean_plusplus/regmean_plusplus.py +56 -38
  58. fusion_bench/method/simple_average.py +12 -16
  59. fusion_bench/method/slerp/slerp.py +5 -2
  60. fusion_bench/method/smile_upscaling/causal_lm_upscaling.py +371 -0
  61. fusion_bench/method/smile_upscaling/error_accumulation.py +177 -0
  62. fusion_bench/method/smile_upscaling/projected_energy.py +144 -0
  63. fusion_bench/method/smile_upscaling/smile_mistral_upscaling.py +5 -1
  64. fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py +71 -51
  65. fusion_bench/method/smile_upscaling/smile_upscaling.py +12 -5
  66. fusion_bench/method/tall_mask/task_arithmetic.py +3 -11
  67. fusion_bench/method/task_arithmetic/task_arithmetic.py +6 -10
  68. fusion_bench/method/ties_merging/ties_merging.py +13 -26
  69. fusion_bench/method/we_moe/__init__.py +1 -0
  70. fusion_bench/method/we_moe/clip_we_moe.py +5 -4
  71. fusion_bench/method/we_moe/entropy_loss.py +25 -0
  72. fusion_bench/method/we_moe/flan_t5_we_moe.py +331 -0
  73. fusion_bench/method/we_moe/utils.py +15 -0
  74. fusion_bench/method/we_moe/we_moe.py +6 -6
  75. fusion_bench/method/weighted_average/llama.py +4 -16
  76. fusion_bench/metrics/continual_learning/__init__.py +1 -0
  77. fusion_bench/metrics/continual_learning/backward_transfer.py +1 -1
  78. fusion_bench/metrics/nyuv2/__init__.py +2 -2
  79. fusion_bench/metrics/nyuv2/segmentation.py +1 -1
  80. fusion_bench/mixins/__init__.py +10 -2
  81. fusion_bench/mixins/clip_classification.py +15 -45
  82. fusion_bench/mixins/hydra_config.py +105 -7
  83. fusion_bench/mixins/lightning_fabric.py +2 -0
  84. fusion_bench/mixins/serialization.py +275 -48
  85. fusion_bench/modelpool/__init__.py +2 -2
  86. fusion_bench/modelpool/base_pool.py +29 -9
  87. fusion_bench/modelpool/causal_lm/causal_lm.py +41 -33
  88. fusion_bench/modelpool/clip_vision/modelpool.py +1 -3
  89. fusion_bench/modelpool/seq_classification_lm/__init__.py +1 -1
  90. fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py +1 -1
  91. fusion_bench/models/__init__.py +7 -1
  92. fusion_bench/models/expert_sparsity/mixtral/__init__.py +1 -1
  93. fusion_bench/models/hf_utils.py +160 -0
  94. fusion_bench/models/linearized/linearized_model_utils.py +4 -4
  95. fusion_bench/models/linearized/vision_model.py +1 -1
  96. fusion_bench/models/model_card_templates/default.md +46 -0
  97. fusion_bench/models/modeling_deepseek_v2/__init__.py +1 -1
  98. fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py +4 -4
  99. fusion_bench/models/modeling_deepseek_v2/tokenization_deepseek_fast.py +0 -1
  100. fusion_bench/models/modeling_smile_gemma2/__init__.py +9 -0
  101. fusion_bench/models/modeling_smile_gemma2/configuration_smile_gemma2.py +20 -0
  102. fusion_bench/models/modeling_smile_gemma2/modeling_smile_gemma2.py +986 -0
  103. fusion_bench/models/modeling_smile_gemma2/register.py +26 -0
  104. fusion_bench/models/modeling_smile_llama/__init__.py +7 -0
  105. fusion_bench/models/modeling_smile_llama/configuration_smile_llama.py +20 -0
  106. fusion_bench/models/modeling_smile_llama/modeling_smile_llama.py +698 -0
  107. fusion_bench/models/modeling_smile_llama/register.py +8 -0
  108. fusion_bench/models/modeling_smile_mistral/__init__.py +5 -47
  109. fusion_bench/models/modeling_smile_qwen2/__init__.py +1 -1
  110. fusion_bench/models/modeling_smile_qwen2/modeling_smile_qwen2.py +7 -12
  111. fusion_bench/models/modeling_smile_qwen2/register.py +1 -4
  112. fusion_bench/models/parameter_dict.py +1 -1
  113. fusion_bench/models/sparse_we_moe.py +1 -53
  114. fusion_bench/models/utils.py +26 -0
  115. fusion_bench/models/we_moe.py +1 -53
  116. fusion_bench/models/wrappers/ensemble.py +6 -4
  117. fusion_bench/models/wrappers/layer_wise_fusion.py +1 -1
  118. fusion_bench/models/wrappers/task_wise_fusion.py +250 -72
  119. fusion_bench/programs/base_program.py +81 -2
  120. fusion_bench/programs/fabric_fusion_program.py +46 -61
  121. fusion_bench/scripts/cli.py +38 -5
  122. fusion_bench/taskpool/base_pool.py +4 -3
  123. fusion_bench/taskpool/clip_vision/taskpool.py +43 -22
  124. fusion_bench/taskpool/dummy.py +1 -1
  125. fusion_bench/taskpool/lm_eval_harness/taskpool.py +1 -2
  126. fusion_bench/tasks/clip_classification/__init__.py +6 -4
  127. fusion_bench/utils/__init__.py +7 -1
  128. fusion_bench/utils/cache_utils.py +101 -1
  129. fusion_bench/utils/devices.py +14 -4
  130. fusion_bench/utils/fabric.py +2 -2
  131. fusion_bench/utils/instantiate_utils.py +3 -1
  132. fusion_bench/utils/lazy_imports.py +23 -0
  133. fusion_bench/utils/lazy_state_dict.py +38 -3
  134. fusion_bench/utils/modelscope.py +127 -8
  135. fusion_bench/utils/parameters.py +2 -2
  136. fusion_bench/utils/path.py +56 -0
  137. fusion_bench/utils/pylogger.py +1 -1
  138. fusion_bench/utils/rich_utils.py +3 -0
  139. fusion_bench/utils/state_dict_arithmetic.py +25 -23
  140. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/METADATA +24 -47
  141. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/RECORD +184 -145
  142. fusion_bench_config/_get_started/clip_evaluate_single_model.yaml +21 -0
  143. fusion_bench_config/_get_started/clip_simple_average.yaml +23 -0
  144. fusion_bench_config/_get_started/clip_task_arithmetic.yaml +24 -0
  145. fusion_bench_config/_get_started/greeting_program.yaml +4 -0
  146. fusion_bench_config/fabric/loggers/csv_logger.yaml +3 -3
  147. fusion_bench_config/fabric/loggers/tensorboard_logger.yaml +3 -3
  148. fusion_bench_config/fabric_model_fusion.yaml +45 -17
  149. fusion_bench_config/hydra/default.yaml +6 -2
  150. fusion_bench_config/llama_full_finetune.yaml +1 -0
  151. fusion_bench_config/method/adamerging/clip.yaml +1 -1
  152. fusion_bench_config/method/bitdelta/bitdelta.yaml +12 -0
  153. fusion_bench_config/method/depth_upscaling.yaml +4 -1
  154. fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml +0 -1
  155. fusion_bench_config/method/linear/simple_average_for_llama.yaml +3 -2
  156. fusion_bench_config/method/smile_upscaling/causal_lm_upscaling.yaml +21 -0
  157. fusion_bench_config/method/smile_upscaling/error_accumulation.yaml +5 -0
  158. fusion_bench_config/method/smile_upscaling/projected_energy.yaml +2 -0
  159. fusion_bench_config/method/smile_upscaling/smile_qwen2_upscaling.yaml +2 -1
  160. fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml +20 -0
  161. fusion_bench_config/modelpool/CLIPVisionModelPool/_template.yaml +1 -4
  162. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_individual.yaml +4 -9
  163. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_single_finetuned.yaml +1 -1
  164. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -6
  165. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8.yaml +1 -1
  166. fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TA8_model_only.yaml +1 -1
  167. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml +3 -3
  168. fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml +9 -0
  169. fusion_bench_config/modelpool/CausalLMPool/mistral-7b.yaml +6 -0
  170. fusion_bench_config/modelpool/CausalLMPool/mixtral_moe_merging.yaml +10 -0
  171. fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml +4 -12
  172. fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml +6 -16
  173. fusion_bench_config/modelpool/CausalLMPool/vicuna-7b-v1.5.yaml +8 -0
  174. fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/llama_preference700k.yaml +1 -1
  175. fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/single_reward_model.yaml +1 -1
  176. fusion_bench_config/nyuv2_config.yaml +3 -1
  177. fusion_bench_config/nyuv2_mtl_train.yaml +1 -0
  178. fusion_bench_config/path/default.yaml +28 -0
  179. fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_svhn_and_mnist.yaml +24 -0
  180. fusion_bench_config/method/adamerging.yaml +0 -23
  181. fusion_bench_config/modelpool/mixtral_moe_merging.yaml +0 -14
  182. fusion_bench_config/modelpool/mixtral_moe_upscaling.yaml +0 -6
  183. fusion_bench_config/taskpool/clip-vit-base-patch32_svhn_and_mnist.yaml +0 -22
  184. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/WHEEL +0 -0
  185. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/entry_points.txt +0 -0
  186. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/licenses/LICENSE +0 -0
  187. {fusion_bench-0.2.20.dist-info → fusion_bench-0.2.22.dist-info}/top_level.txt +0 -0
  188. /fusion_bench_config/modelpool/{SeqenceClassificationModelPool → SequenceClassificationModelPool}/roberta-base_glue.yaml +0 -0
@@ -2,27 +2,37 @@ import os
2
2
  from typing import Literal, Optional
3
3
 
4
4
  from datasets import load_dataset as datasets_load_dataset
5
+
5
6
  from fusion_bench.utils import validate_and_suggest_corrections
6
7
 
7
8
  try:
9
+ from modelscope import dataset_file_download as modelscope_dataset_file_download
10
+ from modelscope import model_file_download as modelscope_model_file_download
8
11
  from modelscope import snapshot_download as modelscope_snapshot_download
12
+
9
13
  except ImportError:
10
14
 
11
- def modelscope_snapshot_download(*args, **kwargs):
15
+ def _raise_modelscope_not_installed_error(*args, **kwargs):
12
16
  raise ImportError(
13
17
  "ModelScope is not installed. Please install it using `pip install modelscope` to use ModelScope models."
14
18
  )
15
19
 
20
+ modelscope_snapshot_download = _raise_modelscope_not_installed_error
21
+ modelscope_model_file_download = _raise_modelscope_not_installed_error
22
+ modelscope_dataset_file_download = _raise_modelscope_not_installed_error
16
23
 
17
24
  try:
25
+ from huggingface_hub import hf_hub_download
18
26
  from huggingface_hub import snapshot_download as huggingface_snapshot_download
19
27
  except ImportError:
20
28
 
21
- def huggingface_snapshot_download(*args, **kwargs):
29
+ def _raise_huggingface_not_installed_error(*args, **kwargs):
22
30
  raise ImportError(
23
31
  "Hugging Face Hub is not installed. Please install it using `pip install huggingface_hub` to use Hugging Face models."
24
32
  )
25
33
 
34
+ huggingface_snapshot_download = _raise_huggingface_not_installed_error
35
+ hf_hub_download = _raise_huggingface_not_installed_error
26
36
 
27
37
  __all__ = [
28
38
  "load_dataset",
@@ -32,6 +42,12 @@ __all__ = [
32
42
  AVAILABLE_PLATFORMS = ["hf", "huggingface", "modelscope"]
33
43
 
34
44
 
45
+ def _raise_unknown_platform_error(platform: str):
46
+ raise ValueError(
47
+ f"Unsupported platform: {platform}. Supported platforms are 'hf', 'huggingface', and 'modelscope'."
48
+ )
49
+
50
+
35
51
  def load_dataset(
36
52
  name: str,
37
53
  split: str = "train",
@@ -55,9 +71,7 @@ def load_dataset(
55
71
  dataset_dir = modelscope_snapshot_download(name, repo_type="dataset")
56
72
  return datasets_load_dataset(dataset_dir, split=split)
57
73
  else:
58
- raise ValueError(
59
- f"Unsupported platform: {platform}. Supported platforms are 'hf', 'huggingface', and 'modelscope'."
60
- )
74
+ _raise_unknown_platform_error(platform)
61
75
 
62
76
 
63
77
  def resolve_repo_path(
@@ -138,9 +152,114 @@ def resolve_repo_path(
138
152
  repo_id=repo_id, repo_type=repo_type, **kwargs
139
153
  )
140
154
  else:
141
- raise ValueError(
142
- f"Unsupported platform: {platform}. Supported platforms are 'hf', 'huggingface', and 'modelscope'."
143
- )
155
+ _raise_unknown_platform_error(platform)
144
156
  return local_path
145
157
  except Exception as e:
146
158
  raise FileNotFoundError(f"Could not resolve checkpoint: {repo_id}. Error: {e}")
159
+
160
+
161
+ def resolve_file_path(
162
+ repo_id: str,
163
+ filename: str,
164
+ repo_type: Literal["model", "dataset"] = "model",
165
+ platform: Literal["hf", "huggingface", "modelscope"] = "hf",
166
+ **kwargs,
167
+ ) -> str:
168
+ """
169
+ Resolve and download a specific file from a repository across multiple platforms.
170
+
171
+ This function downloads a specific file from repositories hosted on various platforms
172
+ including local paths, Hugging Face Hub, and ModelScope. It handles platform-specific
173
+ URL prefixes and automatically determines the appropriate download method.
174
+
175
+ Args:
176
+ repo_id (str): Repository identifier. Can be:
177
+ - Local directory path (file will be joined with this path if it exists)
178
+ - Hugging Face model/dataset ID (e.g., "bert-base-uncased")
179
+ - ModelScope model/dataset ID
180
+ - URL-prefixed ID (e.g., "hf://model-name", "modelscope://model-name").
181
+ The prefix will override the platform argument.
182
+ filename (str): The specific file to download from the repository.
183
+ repo_type (Literal["model", "dataset"], optional): Type of repository.
184
+ Defaults to "model". Used for ModelScope platform to determine the
185
+ correct download function.
186
+ platform (Literal["hf", "huggingface", "modelscope"], optional):
187
+ Platform to download from. Defaults to "hf". Options:
188
+ - "hf" or "huggingface": Hugging Face Hub
189
+ - "modelscope": ModelScope platform
190
+ **kwargs: Additional arguments passed to the underlying download functions
191
+ (e.g., cache_dir, force_download, use_auth_token).
192
+
193
+ Returns:
194
+ str: Local path to the downloaded file.
195
+
196
+ Raises:
197
+ ValueError: If an unsupported repo_type is specified for ModelScope platform.
198
+ ImportError: If required dependencies for the specified platform are not installed.
199
+ FileNotFoundError: If the file cannot be found or downloaded.
200
+
201
+ Examples:
202
+ >>> # Download config.json from a Hugging Face model
203
+ >>> resolve_file_path("bert-base-uncased", "config.json")
204
+ "/home/user/.cache/huggingface/hub/models--bert-base-uncased/.../config.json"
205
+
206
+ >>> # Download from ModelScope
207
+ >>> resolve_file_path(
208
+ ... "damo/nlp_bert_backbone_base_std",
209
+ ... "pytorch_model.bin",
210
+ ... platform="modelscope"
211
+ ... )
212
+ "/home/user/.cache/modelscope/hub/.../pytorch_model.bin"
213
+
214
+ >>> # Local file path
215
+ >>> resolve_file_path("/path/to/local/model", "config.json")
216
+ "/path/to/local/model/config.json"
217
+
218
+ >>> # URL-prefixed repository
219
+ >>> resolve_file_path("hf://microsoft/DialoGPT-medium", "config.json")
220
+ "/home/user/.cache/huggingface/hub/.../config.json"
221
+
222
+ >>> # Download dataset file from ModelScope
223
+ >>> resolve_file_path(
224
+ ... "DAMO_NLP/jd",
225
+ ... "train.json",
226
+ ... repo_type="dataset",
227
+ ... platform="modelscope"
228
+ ... )
229
+ "/home/user/.cache/modelscope/datasets/.../train.json"
230
+ """
231
+ # If it's a HuggingFace Hub model id, download snapshot
232
+ if repo_id.startswith("hf://") or repo_id.startswith("huggingface://"):
233
+ repo_id = repo_id.replace("hf://", "").replace("huggingface://", "")
234
+ platform = "hf"
235
+ # If it's a ModelScope model id, download snapshot
236
+ elif repo_id.startswith("modelscope://"):
237
+ repo_id = repo_id.replace("modelscope://", "")
238
+ platform = "modelscope"
239
+
240
+ # If it's a local file or directory, return as is
241
+ if os.path.exists(repo_id):
242
+ return os.path.join(repo_id, filename)
243
+
244
+ if platform in ["hf", "huggingface"]:
245
+ return hf_hub_download(
246
+ repo_id=repo_id,
247
+ filename=filename,
248
+ repo_type=repo_type,
249
+ **kwargs,
250
+ )
251
+ elif platform == "modelscope":
252
+ if repo_type == "model":
253
+ return modelscope_model_file_download(
254
+ model_id=repo_id, file_path=filename, **kwargs
255
+ )
256
+ elif repo_type == "dataset":
257
+ return modelscope_dataset_file_download(
258
+ dataset_id=repo_id, file_path=filename, **kwargs
259
+ )
260
+ else:
261
+ raise ValueError(
262
+ f"Unsupported repo_type: {repo_type}. Supported types are 'model' and 'dataset'."
263
+ )
264
+ else:
265
+ _raise_unknown_platform_error(platform)
@@ -1,6 +1,6 @@
1
1
  import copy
2
2
  from collections import OrderedDict
3
- from typing import List, Mapping, Optional, Union
3
+ from typing import Dict, List, Mapping, Optional, Union
4
4
 
5
5
  import torch
6
6
  from torch import nn
@@ -83,7 +83,7 @@ def vector_to_state_dict(
83
83
  vector: torch.Tensor,
84
84
  state_dict: Union[StateDictType, nn.Module],
85
85
  remove_keys: Optional[List[str]] = None,
86
- ):
86
+ ) -> Dict[str, torch.Tensor]:
87
87
  """
88
88
  Convert a vector to a state dictionary.
89
89
 
@@ -1,6 +1,9 @@
1
+ import logging
1
2
  import os
2
3
  from typing import List
3
4
 
5
+ log = logging.getLogger(__name__)
6
+
4
7
 
5
8
  def path_is_dir_and_not_empty(path: str):
6
9
  if path is None:
@@ -20,3 +23,56 @@ def listdir_fullpath(dir: str) -> List[str]:
20
23
  assert os.path.isdir(dir), "Argument 'dir' must be a Directory"
21
24
  names = os.listdir(dir)
22
25
  return [os.path.join(dir, name) for name in names]
26
+
27
+
28
+ def create_symlink(src_dir: str, dst_dir: str, link_name: str = None):
29
+ """
30
+ Creates a symbolic link from src_dir to dst_dir.
31
+
32
+ Args:
33
+ src_dir (str): The source directory to link to.
34
+ dst_dir (str): The destination directory where the symlink will be created.
35
+ link_name (str, optional): The name of the symlink. If None, uses the basename of src_dir.
36
+
37
+ Raises:
38
+ OSError: If the symbolic link creation fails.
39
+ ValueError: If src_dir does not exist or is not a directory.
40
+ """
41
+ if not os.path.exists(src_dir):
42
+ raise ValueError(f"Source directory does not exist: {src_dir}")
43
+
44
+ if not os.path.isdir(src_dir):
45
+ raise ValueError(f"Source path is not a directory: {src_dir}")
46
+
47
+ # Avoid creating symlink if source and destination are the same
48
+ if os.path.abspath(src_dir) == os.path.abspath(dst_dir):
49
+ log.warning(
50
+ "Source and destination directories are the same, skipping symlink creation"
51
+ )
52
+ return
53
+
54
+ # Create destination directory if it doesn't exist
55
+ os.makedirs(dst_dir, exist_ok=True)
56
+
57
+ # Determine link name
58
+ if link_name is None:
59
+ link_name = os.path.basename(src_dir)
60
+
61
+ link_path = os.path.join(dst_dir, link_name)
62
+
63
+ try:
64
+ # if the system is windows, use the `mklink` command in "CMD" to create the symlink
65
+ if os.name == "nt":
66
+ os.system(
67
+ f"mklink /J {os.path.abspath(link_path)} {os.path.abspath(src_dir)}"
68
+ )
69
+ else:
70
+ os.symlink(
71
+ src_dir,
72
+ link_path,
73
+ target_is_directory=True,
74
+ )
75
+ log.info(f"Created symbolic link: {link_path} -> {src_dir}")
76
+ except OSError as e:
77
+ log.warning(f"Failed to create symbolic link: {e}")
78
+ raise
@@ -74,7 +74,7 @@ RankZeroLogger.manager = logging.Manager(RankZeroLogger.root)
74
74
  RankZeroLogger.manager.setLoggerClass(RankZeroLogger)
75
75
 
76
76
 
77
- def getRankZeroLogger(name=None):
77
+ def get_rankzero_logger(name=None):
78
78
  """
79
79
  Return a logger with the specified name, creating it if necessary.
80
80
 
@@ -189,6 +189,9 @@ if __name__ == "__main__":
189
189
 
190
190
 
191
191
  def setup_colorlogging(force=False, **config_kwargs):
192
+ """
193
+ Sets up color logging for the application.
194
+ """
192
195
  FORMAT = "%(message)s"
193
196
 
194
197
  logging.basicConfig(
@@ -44,7 +44,7 @@ def state_dicts_check_keys(state_dicts: List[StateDictType]):
44
44
  assert keys == set(state_dict.keys()), "keys of state_dicts are not equal"
45
45
 
46
46
 
47
- def num_params_of_state_dict(state_dict: StateDictType):
47
+ def num_params_of_state_dict(state_dict: StateDictType) -> int:
48
48
  """
49
49
  Returns the number of parameters in a state dict.
50
50
 
@@ -57,7 +57,7 @@ def num_params_of_state_dict(state_dict: StateDictType):
57
57
  return sum([state_dict[key].numel() for key in state_dict])
58
58
 
59
59
 
60
- def state_dict_flatten(state_dict: Dict[str, Tensor]):
60
+ def state_dict_flatten(state_dict: Dict[str, Tensor]) -> Tensor:
61
61
  """
62
62
  Flattens a state dict.
63
63
 
@@ -73,7 +73,7 @@ def state_dict_flatten(state_dict: Dict[str, Tensor]):
73
73
  return torch.cat(flattened_state_dict)
74
74
 
75
75
 
76
- def state_dict_avg(state_dicts: List[StateDictType]):
76
+ def state_dict_avg(state_dicts: List[StateDictType]) -> StateDictType:
77
77
  """
78
78
  Returns the average of a list of state dicts.
79
79
 
@@ -100,7 +100,7 @@ def state_dict_avg(state_dicts: List[StateDictType]):
100
100
 
101
101
  def state_dict_sub(
102
102
  a: StateDictType, b: StateDictType, strict: bool = True, device=None
103
- ):
103
+ ) -> StateDictType:
104
104
  """
105
105
  Returns the difference between two state dicts `a-b`.
106
106
 
@@ -130,7 +130,7 @@ def state_dict_add(
130
130
  strict: bool = True,
131
131
  device=None,
132
132
  show_pbar: bool = False,
133
- ):
133
+ ) -> StateDictType:
134
134
  """
135
135
  Returns the sum of two state dicts.
136
136
 
@@ -156,14 +156,14 @@ def state_dict_add(
156
156
  return ans
157
157
 
158
158
 
159
- def state_dict_add_scalar(a: StateDictType, scalar: Number):
159
+ def state_dict_add_scalar(a: StateDictType, scalar: Number) -> StateDictType:
160
160
  ans = OrderedDict()
161
161
  for key in a:
162
162
  ans[key] = a[key] + scalar
163
163
  return ans
164
164
 
165
165
 
166
- def state_dict_mul(state_dict: StateDictType, scalar: float):
166
+ def state_dict_mul(state_dict: StateDictType, scalar: float) -> StateDictType:
167
167
  """
168
168
  Returns the product of a state dict and a scalar.
169
169
 
@@ -180,7 +180,9 @@ def state_dict_mul(state_dict: StateDictType, scalar: float):
180
180
  return diff
181
181
 
182
182
 
183
- def state_dict_div(state_dict: StateDictType, scalar: float, show_pbar: bool = False):
183
+ def state_dict_div(
184
+ state_dict: StateDictType, scalar: float, show_pbar: bool = False
185
+ ) -> StateDictType:
184
186
  """
185
187
  Returns the division of a state dict by a scalar.
186
188
 
@@ -197,16 +199,16 @@ def state_dict_div(state_dict: StateDictType, scalar: float, show_pbar: bool = F
197
199
  return diff
198
200
 
199
201
 
200
- def state_dict_power(state_dict: Dict[str, Tensor], p: float):
202
+ def state_dict_power(state_dict: StateDictType, p: float) -> StateDictType:
201
203
  """
202
204
  Returns the power of a state dict.
203
205
 
204
206
  Args:
205
- state_dict (Dict[str, Tensor]): The state dict to be powered.
207
+ state_dict (StateDictType): The state dict to be powered.
206
208
  p (float): The power to raise the state dict to.
207
209
 
208
210
  Returns:
209
- Dict[str, Tensor]: The powered state dict.
211
+ StateDictType: The powered state dict.
210
212
  """
211
213
  powered_state_dict = {}
212
214
  for key in state_dict:
@@ -215,17 +217,17 @@ def state_dict_power(state_dict: Dict[str, Tensor], p: float):
215
217
 
216
218
 
217
219
  def state_dict_interpolation(
218
- state_dicts: List[Dict[str, Tensor]], scalars: List[float]
219
- ):
220
+ state_dicts: List[StateDictType], scalars: List[float]
221
+ ) -> StateDictType:
220
222
  """
221
223
  Interpolates between a list of state dicts using a list of scalars.
222
224
 
223
225
  Args:
224
- state_dicts (List[Dict[str, Tensor]]): The list of state dicts to interpolate between.
226
+ state_dicts (List[StateDictType]): The list of state dicts to interpolate between.
225
227
  scalars (List[float]): The list of scalars to use for interpolation.
226
228
 
227
229
  Returns:
228
- Dict: The interpolated state dict.
230
+ StateDictType: The interpolated state dict.
229
231
  """
230
232
  assert len(state_dicts) == len(
231
233
  scalars
@@ -243,15 +245,15 @@ def state_dict_interpolation(
243
245
  return interpolated_state_dict
244
246
 
245
247
 
246
- def state_dict_sum(state_dicts: List[StateDictType]):
248
+ def state_dict_sum(state_dicts: List[StateDictType]) -> StateDictType:
247
249
  """
248
250
  Returns the sum of a list of state dicts.
249
251
 
250
252
  Args:
251
- state_dicts (List[Dict[str, Tensor]]): The list of state dicts to sum.
253
+ state_dicts (List[StateDictType]): The list of state dicts to sum.
252
254
 
253
255
  Returns:
254
- Dict: The sum of the state dicts.
256
+ StateDictType: The sum of the state dicts.
255
257
  """
256
258
  assert len(state_dicts) > 0, "The number of state_dicts must be greater than 0"
257
259
  assert all(
@@ -267,17 +269,17 @@ def state_dict_sum(state_dicts: List[StateDictType]):
267
269
 
268
270
 
269
271
  def state_dict_weighted_sum(
270
- state_dicts: List[Dict[str, Tensor]], weights: List[float], device=None
271
- ):
272
+ state_dicts: List[StateDictType], weights: List[float], device=None
273
+ ) -> StateDictType:
272
274
  """
273
275
  Returns the weighted sum of a list of state dicts.
274
276
 
275
277
  Args:
276
- state_dicts (List[Dict[str, Tensor]]): The list of state dicts to interpolate between.
278
+ state_dicts (List[StateDictType]): The list of state dicts to interpolate between.
277
279
  weights (List[float]): The list of weights to use for the weighted sum.
278
280
 
279
281
  Returns:
280
- Dict: The weighted sum of the state dicts.
282
+ StateDictType: The weighted sum of the state dicts.
281
283
  """
282
284
  assert len(state_dicts) == len(
283
285
  weights
@@ -302,7 +304,7 @@ def state_dict_weighted_sum(
302
304
  return weighted_sum_state_dict
303
305
 
304
306
 
305
- def state_dict_diff_abs(a: StateDictType, b: StateDictType):
307
+ def state_dict_diff_abs(a: StateDictType, b: StateDictType) -> StateDictType:
306
308
  """
307
309
  Returns the per-layer abs of the difference between two state dicts.
308
310
 
@@ -1,30 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fusion_bench
3
- Version: 0.2.20
3
+ Version: 0.2.22
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
- License: MIT License
7
-
8
- Copyright (c) 2024 Anke Tang
9
-
10
- Permission is hereby granted, free of charge, to any person obtaining a copy
11
- of this software and associated documentation files (the "Software"), to deal
12
- in the Software without restriction, including without limitation the rights
13
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
- copies of the Software, and to permit persons to whom the Software is
15
- furnished to do so, subject to the following conditions:
16
-
17
- The above copyright notice and this permission notice shall be included in all
18
- copies or substantial portions of the Software.
19
-
20
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
- SOFTWARE.
27
-
28
6
  Project-URL: Repository, https://github.com/tanganke/fusion_bench
29
7
  Project-URL: Homepage, https://github.com/tanganke/fusion_bench
30
8
  Project-URL: Issues, https://github.com/tanganke/fusion_bench/issues
@@ -45,13 +23,17 @@ Requires-Dist: rich
45
23
  Requires-Dist: scipy
46
24
  Requires-Dist: h5py
47
25
  Requires-Dist: pytest
26
+ Requires-Dist: transformers!=4.49
27
+ Requires-Dist: pillow!=11.2.1
48
28
  Provides-Extra: lm-eval-harness
49
29
  Requires-Dist: lm-eval; extra == "lm-eval-harness"
30
+ Requires-Dist: immutabledict; extra == "lm-eval-harness"
31
+ Requires-Dist: langdetect; extra == "lm-eval-harness"
50
32
  Dynamic: license-file
51
33
 
52
34
  <div align='center'>
53
35
 
54
- # FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
36
+ # FusionBench: A Comprehensive Benchmark/Toolkit of Deep Model Fusion
55
37
 
56
38
  [![arXiv](https://img.shields.io/badge/arXiv-2406.03280-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
57
39
  [![GitHub License](https://img.shields.io/github/license/tanganke/fusion_bench)](https://github.com/tanganke/fusion_bench/blob/main/LICENSE)
@@ -75,7 +57,7 @@ Projects based on FusionBench and news from the community (descending order of d
75
57
  <details>
76
58
  <summary>The-Hai Nguyen, Dang Huu-Tien, Takeshi Suzuki, and Le-Minh Nguyen. RegMean++: Enhancing Effectiveness and Generalization of Regression Mean for Model Merging. Aug, 2025. https://www.arxiv.org/abs/2508.03121</summary>
77
59
 
78
- Regression Mean (RegMean), an approach that formulates model merging as a linear regression problem, aims to find the optimal weights for each linear layer in the merge model by minimizing the discrepancy in predictions between the merge and candidate models. RegMean provides a precise closed-form solution for the merging problem; therefore, it offers explainability and computational efficiency. However, RegMean merges each linear layer independently, overlooking how the features and information in the earlier layers propagate through the layers and influence the final prediction in the merge model. In this paper, we introduce RegMean++, a simple yet effective alternative to RegMean, that explicitly incorporates both intra- and cross-layer dependencies between merge models' layers into RegMean's objective. By accounting for these dependencies, RegMean++ better captures the behaviors of the merge model. Extensive experiments demonstrate that RegMean++ consistently outperforms RegMean across diverse settings, including in-domain (ID) and out-of-domain (OOD) generalization, sequential merging, large-scale tasks, and robustness under several types of distribution shifts. Furthermore, RegMean++ achieves competitive or state-of-the-art performance compared to various recent advanced model merging methods.
60
+ Regression Mean (RegMean), an approach that formulates model merging as a linear regression problem, aims to find the optimal weights for each linear layer in the merge model by minimizing the discrepancy in predictions between the merge and candidate models. RegMean provides a precise closed-form solution for the merging problem; therefore, it offers explainability and computational efficiency. However, RegMean merges each linear layer independently, overlooking how the features and information in the earlier layers propagate through the layers and influence the final prediction in the merge model. In this paper, we introduce RegMean++, a simple yet effective alternative to RegMean, that explicitly incorporates both intra- and cross-layer dependencies between merge models' layers into RegMean's objective. By accounting for these dependencies, RegMean++ better captures the behaviors of the merge model. Extensive experiments demonstrate that RegMean++ consistently outperforms RegMean across diverse settings, including in-domain (ID) and out-of-domain (OOD) generalization, sequential merging, large-scale tasks, and robustness under several types of distribution shifts. Furthermore, RegMean++ achieves competitive or state-of-the-art performance compared to various recent advanced model merging methods.
79
61
 
80
62
  <img width="1000" alt="image" src="docs/algorithms/images/regmean_vs_regmean_plusplus.png">
81
63
  </details>
@@ -89,7 +71,7 @@ Model merging has emerged as a promising approach for multi-task learning (MTL),
89
71
  <details>
90
72
  <summary>Daniel Marczak, et al. No Task Left Behind: Isotropic Model Merging with Common and Task-Specific Subspaces. Feb 2025. https://arxiv.org/abs/2502.04959</summary>
91
73
 
92
- Model merging integrates the weights of multiple task-specific models into a single multi-task model. Despite recent interest in the problem, a significant performance gap between the combined and single-task models remains. In this paper, we investigate the key characteristics of task matrices -- weight update matrices applied to a pre-trained model -- that enable effective merging. We show that alignment between singular components of task-specific and merged matrices strongly correlates with performance improvement over the pre-trained model. Based on this, we propose an isotropic merging framework that flattens the singular value spectrum of task matrices, enhances alignment, and reduces the performance gap. Additionally, we incorporate both common and task-specific subspaces to further improve alignment and performance. Our proposed approach achieves state-of-the-art performance across multiple scenarios, including various sets of tasks and model scales. This work advances the understanding of model merging dynamics, offering an effective methodology to merge models without requiring additional training.
74
+ Model merging integrates the weights of multiple task-specific models into a single multi-task model. Despite recent interest in the problem, a significant performance gap between the combined and single-task models remains. In this paper, we investigate the key characteristics of task matrices -- weight update matrices applied to a pre-trained model -- that enable effective merging. We show that alignment between singular components of task-specific and merged matrices strongly correlates with performance improvement over the pre-trained model. Based on this, we propose an isotropic merging framework that flattens the singular value spectrum of task matrices, enhances alignment, and reduces the performance gap. Additionally, we incorporate both common and task-specific subspaces to further improve alignment and performance. Our proposed approach achieves state-of-the-art performance across multiple scenarios, including various sets of tasks and model scales. This work advances the understanding of model merging dynamics, offering an effective methodology to merge models without requiring additional training.
93
75
  </details>
94
76
 
95
77
  <details>
@@ -107,12 +89,12 @@ Merging multiple expert models offers a promising approach for performing multi-
107
89
  <details>
108
90
  <summary>Hongling Zheng, Li Shen, Anke Tang, Yong Luo et al. Learn From Model Beyond Fine-Tuning: A Survey. Nature Machine Intelligence. Jan, 2025. https://www.nature.com/articles/s42256-024-00961-0</summary>
109
91
 
110
- > Foundation models (FM) have demonstrated remarkable performance across a wide range of tasks (especially in the fields of natural language processing and computer vision), primarily attributed to their ability to comprehend instructions and access extensive, high-quality data. This not only showcases their current effectiveness but also sets a promising trajectory towards the development of artificial general intelligence. Unfortunately, due to multiple constraints, the raw data of the model used for large model training are often inaccessible, so the use of end-to-end models for downstream tasks has become a new research trend, which we call Learn From Model (LFM) in this article. LFM focuses on the research, modification, and design of FM based on the model interface, so as to better understand the model structure and weights (in a black box environment), and to generalize the model to downstream tasks. The study of LFM techniques can be broadly categorized into five major areas: model tuning, model distillation, model reuse, meta learning and model editing. Each category encompasses a repertoire of methods and strategies that aim to enhance the capabilities and performance of FM. This paper gives a comprehensive review of the current methods based on FM from the perspective of LFM, in order to help readers better understand the current research status and ideas. To conclude, we summarize the survey by highlighting several critical areas for future exploration and addressing open issues that require further attention from the research community. The relevant papers we investigated in this article can be accessed at https://github.com/ruthless-man/Awesome-Learn-from-Model.
92
+ > Foundation models (FM) have demonstrated remarkable performance across a wide range of tasks (especially in the fields of natural language processing and computer vision), primarily attributed to their ability to comprehend instructions and access extensive, high-quality data. This not only showcases their current effectiveness but also sets a promising trajectory towards the development of artificial general intelligence. Unfortunately, due to multiple constraints, the raw data of the model used for large model training are often inaccessible, so the use of end-to-end models for downstream tasks has become a new research trend, which we call Learn From Model (LFM) in this article. LFM focuses on the research, modification, and design of FM based on the model interface, so as to better understand the model structure and weights (in a black box environment), and to generalize the model to downstream tasks. The study of LFM techniques can be broadly categorized into five major areas: model tuning, model distillation, model reuse, meta learning and model editing. Each category encompasses a repertoire of methods and strategies that aim to enhance the capabilities and performance of FM. This paper gives a comprehensive review of the current methods based on FM from the perspective of LFM, in order to help readers better understand the current research status and ideas. To conclude, we summarize the survey by highlighting several critical areas for future exploration and addressing open issues that require further attention from the research community. The relevant papers we investigated in this article can be accessed at <https://github.com/ruthless-man/Awesome-Learn-from-Model>.
111
93
  </details>
112
94
 
113
95
  <details>
114
96
  <summary>Li Shen, Anke Tang, Enneng Yang et al. Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging. Oct, 2024. https://github.com/EnnengYang/Efficient-WEMoE</summary>
115
-
97
+
116
98
  <img width="1018" alt="image" src="https://github.com/user-attachments/assets/b7e1279e-87fc-4016-8867-1bff7700e271">
117
99
 
118
100
  </details>
@@ -138,7 +120,7 @@ Install from PyPI:
138
120
  pip install fusion-bench
139
121
  ```
140
122
 
141
- or install the latest version in development from github repository
123
+ or install the latest version in development from the GitHub repository
142
124
 
143
125
  ```bash
144
126
  git clone https://github.com/tanganke/fusion_bench.git
@@ -155,7 +137,6 @@ pip install -e . # install the package in editable mode
155
137
 
156
138
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10256836.svg)](https://doi.org/10.5281/zenodo.10256836)
157
139
 
158
-
159
140
  ```bash
160
141
  pip install "fusion-bench[lm-eval-harness]"
161
142
  ```
@@ -205,8 +186,8 @@ The project is structured as follows:
205
186
 
206
187
  ## A Unified Command Line Interface
207
188
 
208
- The `fusion_bench` command-line interface is a powerful tool for researchers and practitioners in the field of model fusion. It provides a streamlined way to experiment with various fusion algorithms, model combinations, and evaluation tasks.
209
- By leveraging Hydra's configuration management, fusion_bench offers flexibility in setting up experiments and reproducibility in results.
189
+ The `fusion_bench` command-line interface is a powerful tool for researchers and practitioners in the field of model fusion. It provides a streamlined way to experiment with various fusion algorithms, model combinations, and evaluation tasks.
190
+ By leveraging Hydra's configuration management, fusion_bench offers flexibility in setting up experiments and reproducibility in results.
210
191
  The CLI's design allows for easy extension to new fusion methods, model types, and tasks, making it a versatile platform for advancing research in model fusion techniques.
211
192
 
212
193
  Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_bench/) for more information.
@@ -245,7 +226,7 @@ class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
245
226
  )
246
227
  ```
247
228
 
248
- A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
229
+ A corresponding configuration file should be created to specify the class and hyperparameters of the algorithm.
249
230
  Here we assume the configuration file is placed at `config/method/your_algorithm_config.yaml`.
250
231
 
251
232
  > [!NOTE]
@@ -280,7 +261,7 @@ Click on [<kbd>Use this template</kbd>](https://github.com/fusion-bench/fusion-b
280
261
 
281
262
  ### FusionBench Command Generator WebUI (for v0.1.x)
282
263
 
283
- FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.
264
+ FusionBench Command Generator is a user-friendly web interface for generating FusionBench commands based on configuration files.
284
265
  It provides an interactive way to select and customize FusionBench configurations, making it easier to run experiments with different settings.
285
266
  [Read more here](https://tanganke.github.io/fusion_bench/cli/fusion_bench_webui/).
286
267
 
@@ -291,18 +272,14 @@ It provides an interactive way to select and customize FusionBench configuration
291
272
  If you find this benchmark useful, please consider citing our work:
292
273
 
293
274
  ```bibtex
294
- @misc{tangFusionBenchComprehensiveBenchmark2024,
295
- title = {{{FusionBench}}: {{A Comprehensive Benchmark}} of {{Deep Model Fusion}}},
296
- shorttitle = {{{FusionBench}}},
297
- author = {Tang, Anke and Shen, Li and Luo, Yong and Hu, Han and Du, Bo and Tao, Dacheng},
298
- year = {2024},
299
- month = jun,
300
- number = {arXiv:2406.03280},
301
- eprint = {2406.03280},
302
- publisher = {arXiv},
303
- url = {http://arxiv.org/abs/2406.03280},
304
- archiveprefix = {arxiv},
305
- langid = {english},
306
- keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Machine Learning}
275
+ @article{tang2024fusionbench,
276
+ title={Fusionbench: A comprehensive benchmark of deep model fusion},
277
+ author={Tang, Anke and Shen, Li and Luo, Yong and Hu, Han and Du, Bo and Tao, Dacheng},
278
+ journal={arXiv preprint arXiv:2406.03280},
279
+ year={2024}
307
280
  }
308
281
  ```
282
+
283
+ ## Star History
284
+
285
+ [![Star History Chart](https://api.star-history.com/svg?repos=tanganke/fusion_bench&type=Date)](https://www.star-history.com/#tanganke/fusion_bench&Date)