PyPI - fusion-bench - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl - Mend

fusion-bench 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

fusion_bench/utils/devices.py CHANGED Viewed

@@ -39,7 +39,12 @@ def clear_cuda_cache():
         log.warning("CUDA is not available. No cache to clear.")
-def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
+def to_device(
+    obj: T,
+    device: Optional[torch.device],
+    copy_on_move: bool = False,
+    **kwargs: Any,
+) -> T:
     """
     Move a given object to the specified device.
@@ -49,12 +54,20 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
     Args:
         obj: The object to be moved to the device. This can be a torch.Tensor, torch.nn.Module, list, tuple, or dict.
         device (torch.device): The target device to move the object to. This can be `None`.
-        **kwargs: Additional keyword arguments to be passed to the `to` method of torch.Tensor or torch.nn.Module. For example, `non_blocking=True`, `dtype=torch.float16`.
+        copy_on_move (bool, optional): Whether to force a copy operation when moving tensors to a different device.
+            If True, tensors will be copied when moved to a different device (copy=True is passed to tensor.to()).
+            If False (default), tensors are moved without forcing a copy operation, allowing PyTorch to optimize
+            the operation. This parameter only affects torch.Tensor objects; modules and other types are unaffected.
+            Defaults to False.
+        **kwargs: Additional keyword arguments to be passed to the `to` method of torch.Tensor or torch.nn.Module.
+            For example, `non_blocking=True`, `dtype=torch.float16`. Note that if `copy_on_move=True`, the `copy`
+            keyword argument will be automatically set and should not be provided manually.
     Returns:
         The object moved to the specified device. The type of the returned object matches the type of the input object.
     Examples:
+        ```python
         >>> tensor = torch.tensor([1, 2, 3])
         >>> to_device(tensor, torch.device('cuda'))
         tensor([1, 2, 3], device='cuda:0')
@@ -66,17 +79,26 @@ def to_device(obj: T, device: Optional[torch.device], **kwargs: Any) -> T:
         >>> data = [torch.tensor([1, 2]), torch.tensor([3, 4])]
         >>> to_device(data, torch.device('cuda'))
         [tensor([1, 2], device='cuda:0'), tensor([3, 4], device='cuda:0')]
+        >>> # Force copy when moving to different device
+        >>> tensor = torch.tensor([1, 2, 3], device='cpu')
+        >>> copied_tensor = to_device(tensor, torch.device('cuda'), copy_on_move=True)
+        >>> # tensor and copied_tensor will have different memory locations
+        ```
     """
-    if isinstance(obj, (torch.Tensor, torch.nn.Module)):
+    if isinstance(obj, torch.Tensor):
+        if copy_on_move:
+            if obj.device != torch.device(device):
+                kwargs["copy"] = True
+        return obj.to(device, **kwargs)
+    elif isinstance(obj, torch.nn.Module):
         return obj.to(device, **kwargs)
     elif isinstance(obj, list):
-        return [to_device(o, device) for o in obj]
+        return [to_device(o, device, **kwargs) for o in obj]
     elif isinstance(obj, tuple):
-        return tuple(to_device(o, device) for o in obj)
+        return tuple(to_device(o, device, **kwargs) for o in obj)
     elif isinstance(obj, dict):
-        for key in obj:
-            obj[key] = to_device(obj[key], device)
-        return obj
+        return {key: to_device(value, device, **kwargs) for key, value in obj.items()}
     else:
         # the default behavior is to return the object as is
         return obj

fusion_bench/utils/lazy_state_dict.py CHANGED Viewed

@@ -76,6 +76,9 @@ class LazyStateDict(Mapping[str, torch.Tensor], Generic[TorchModelType]):
     _index: Optional[Dict[str, str]]
     """Mapping of parameter names to checkpoint files."""
+    meta_module: TorchModelType = None
+    meta_module_class: Optional[Type[TorchModelType]] = None
     def __init__(
         self,
         checkpoint: str,

fusion_bench/utils/rich_utils.py CHANGED Viewed

@@ -188,17 +188,21 @@ if __name__ == "__main__":
     display_available_styles()
-def setup_colorlogging(force=False, **config_kwargs):
+def setup_colorlogging(
+    force=False,
+    level=logging.INFO,
+    **kwargs,
+):
     """
     Sets up color logging for the application.
     """
     FORMAT = "%(message)s"
     logging.basicConfig(
-        level=logging.INFO,
+        level=level,
         format=FORMAT,
         datefmt="[%X]",
         handlers=[RichHandler()],
         force=force,
-        **config_kwargs,
+        **kwargs,
     )

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.23
+Version: 0.2.24
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 Project-URL: Repository, https://github.com/tanganke/fusion_bench
@@ -23,12 +23,19 @@ Requires-Dist: rich
 Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
+Requires-Dist: joblib
+Requires-Dist: bidict
 Requires-Dist: transformers!=4.49
 Requires-Dist: pillow!=11.2.1
 Provides-Extra: lm-eval-harness
 Requires-Dist: lm-eval; extra == "lm-eval-harness"
 Requires-Dist: immutabledict; extra == "lm-eval-harness"
 Requires-Dist: langdetect; extra == "lm-eval-harness"
+Requires-Dist: rich-run; extra == "lm-eval-harness"
+Provides-Extra: docs
+Requires-Dist: mkdocs; extra == "docs"
+Requires-Dist: mkdocs-material; extra == "docs"
+Requires-Dist: mkdocstrings[python]; extra == "docs"
 Dynamic: license-file
 <div align='center'>
@@ -151,7 +158,7 @@ This will install the latest version of fusion-bench and the dependencies requir
 Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
 > [!TIP]
-> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/causal_lm) or in the [`docs/modelpool/causal_lm.md`](docs/modelpool/causal_lm.md) markdown file.
+> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/llm) or in the [`docs/modelpool/llm/index.md`](docs/modelpool/llm/index.md) markdown file.
 ## Introduction to Deep Model Fusion
@@ -179,7 +186,7 @@ The project is structured as follows:
   - `taskpool`: configuration files for the task pool.
   - `model`: configuration files for the models.
   - `dataset`: configuration files for the datasets.
-- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -r mkdocs-requirements.txt`.
+- `docs/`: documentation for the benchmark. We use [mkdocs](https://www.mkdocs.org/) to generate the documentation. Start the documentation server locally with `mkdocs serve`. The required packages can be installed with `pip install -e ".[docs]"`.
 - `examples/`: example scripts for running some of the experiments.
   > **naming convention**: `examples/{method_name}/` contains the files such as bash scripts and jupyter notebooks for the specific method.
 - `tests/`: unit tests for the benchmark.

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/RECORD RENAMED Viewed

@@ -48,12 +48,12 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
 fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
 fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
 fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/method/__init__.py,sha256=MDYyNjJufoOe_iwmlL2ftWoD-72ReVv00mege5MQ6fc,8685
+fusion_bench/method/__init__.py,sha256=-d5WMlvY3kHYSUeompoG71T6fSttXPDjPf6X4TxNkqY,8986
 fusion_bench/method/base_algorithm.py,sha256=OnKSNPQ_nIdIWxryyblW_sko7uoEBN4lGh-eLkJ4kh4,9004
 fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
-fusion_bench/method/ensemble.py,sha256=oGiTJUderoPP0Opd7nHwC6h3VBmGTQ5inuG3wb6F4-A,3097
+fusion_bench/method/ensemble.py,sha256=Bjzqxt-tUp5cawT1jIhqKswN5QH3bkYbmuI4LS4uTG0,3619
 fusion_bench/method/model_recombination.py,sha256=b2ku5wCrWd1QSZscIra4KlhLDxt04JjU30ItMNvpZ6g,5268
-fusion_bench/method/simple_average.py,sha256=fLd14_0218JKyXmwe5M6kgumfD60u2ZVnm3B7PBX-Uc,5508
+fusion_bench/method/simple_average.py,sha256=FuIwHCUNK5CoToBzVt-lo8SK7wjj8CdRpiNLRnAflH4,5519
 fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
 fusion_bench/method/ada_svd/clip_vision.py,sha256=XvXgIdlShAREMsubRgphyycGrhWqSnuVBo6S9bNYSd0,12581
 fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
@@ -128,12 +128,13 @@ fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4R
 fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
 fusion_bench/method/knots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/method/knots/knots_utils.py,sha256=NWlzo4nhQypUcNknU832MP3QT42VsLx-6WQ9QXuSigw,795
-fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
+fusion_bench/method/linear/__init__.py,sha256=0U7JqlX8JuMECKlvLNM16Lxc1lCBN2bVqH8FtNoD-Fw,417
 fusion_bench/method/linear/expo.py,sha256=N7XnBTC0Nz_4gRs1f9TL9g-j-Lku5TF0lAjGKhZHwOw,3990
 fusion_bench/method/linear/linear_interpolation.py,sha256=Y01HPMBb7TaCjEBsbC6gqQyHvY1SRpwPyPPLxvYrL0s,2223
 fusion_bench/method/linear/llama_expo.py,sha256=ccECjhAqcFmzOIDyZ7e_aPzTM2Kj8u2D8TJytyz18YM,8476
-fusion_bench/method/linear/simple_average_for_llama.py,sha256=5psacdQiqtUK_lwYZcXp9kgIU3MFGk6G1JatxeMUjE8,3339
-fusion_bench/method/linear/task_arithmetic_for_llama.py,sha256=4SZpiTD7OzhWUXtcdK3PYdXbBGyDqiZd7oZOQ0lraN0,1963
+fusion_bench/method/linear/simple_average_for_causallm.py,sha256=qc-JiPLu19442DcP0xCl4EDGzVnIbq3WGiAiWkNzv6E,3448
+fusion_bench/method/linear/task_arithmetic_for_causallm.py,sha256=7cewnrjX47omokAdhNvDIQV8zz06_ZNKPWM7CZx30R0,2247
+fusion_bench/method/linear/ties_merging_for_causallm.py,sha256=yi0RCC6eRwXMKUC_cBdFLvejia4nmjPh9Pd0MpaUrVg,2392
 fusion_bench/method/lm_finetune/__init__.py,sha256=IFGAqXujX3Fabzl_tC6zZyOyPFJfVziL0qFtj5MVxj0,149
 fusion_bench/method/lm_finetune/bradley_terry_rm.py,sha256=1nvjOMABuEISyYaTRrFiwHLWvSTgHT8pEzTYBTLBRUg,18779
 fusion_bench/method/lm_finetune/causal_lm_pretrain.py,sha256=4CL9KGFsUzrt-edMfTooo4G4apzTH_57rso3DGGvKL0,219
@@ -222,7 +223,7 @@ fusion_bench/method/tall_mask/__init__.py,sha256=XINPP8PqGQ01he9p2RyHaKGyrcYoJuY
 fusion_bench/method/tall_mask/task_arithmetic.py,sha256=c-5ehKV_t46ljvKTBDr-eA3-FbSD_UNXlza4cOqK5aI,4371
 fusion_bench/method/tall_mask/utils.py,sha256=Wlp8WcPwR_lCaBIZ9rgG6ewLfSzz3G7kPk9yj13pvls,8817
 fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
-fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=k4p8ADupDR5nZGHZjNgNsO8I_8rzqVyAr6Tejh85V0A,5525
+fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=KsSBshf04MUwIjoc0HAAmY6cWMqjZwZOYXbUuU4EaL0,6320
 fusion_bench/method/task_singular_vector/TSVC.py,sha256=yn4SrZNvtA6PoGYJmbmtNeDyDbGnRCgfZ7ZCg914AZU,410
 fusion_bench/method/task_singular_vector/TSVM.py,sha256=Sdgoi8xT0Hl19pmGdIuUS3D1DsVqSVD-Hipp-Sj_HoA,13652
 fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
@@ -231,7 +232,7 @@ fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=WGM8wCICdGsN
 fusion_bench/method/task_singular_vector/utils/__init__.py,sha256=Mep62TnXJscBEFZ6QDsI28cWmfygt8EPwjQdfUJzEZQ,315
 fusion_bench/method/task_singular_vector/utils/task_singular_interference.py,sha256=tXsFwx8eomzu00nSp95CjjWZX82zq32ff2Q6VM_29CM,1348
 fusion_bench/method/ties_merging/__init__.py,sha256=9u9teBbdILbupr9jbwk-qCXSzssCssC5FUV2BfpyZM4,67
-fusion_bench/method/ties_merging/ties_merging.py,sha256=eCpGa9F4VoT0zsl7XKK7WsKz45tu_DkFHeffyJospJc,5152
+fusion_bench/method/ties_merging/ties_merging.py,sha256=u2o7Wo2SJJsxxhBeAhsmY7k4bdZkUtwAwGePGI4Sggc,5916
 fusion_bench/method/ties_merging/ties_merging_utils.py,sha256=EZyltS9hUM8NmcvXjAqhBpj-ucMlMtR95082kPDsJPU,10296
 fusion_bench/method/trust_region/__init__.py,sha256=4ao0E-jTlmTQPArbFWD_dFn_4yve3urNIuSMT8JtRIM,91
 fusion_bench/method/trust_region/clip_task_arithmetic.py,sha256=SWP7sRMiXzkDZ3KdNht3zqjaTcAtB4wpnnd8KYbcKZI,7441
@@ -245,6 +246,8 @@ fusion_bench/method/we_moe/we_moe.py,sha256=_QtmD04oFh7aLhmPq8EYchYB7BIN9ZFWOeys
 fusion_bench/method/weighted_average/__init__.py,sha256=bLxIuuB72hH05J_Spz4MZbiLpYL39iwgVIQa_QeQpIk,118
 fusion_bench/method/weighted_average/llama.py,sha256=vvxXp8v98kvXfHi7fYupnIrOVoA3tp08lmV2jDri_BY,3731
 fusion_bench/method/weighted_average/weighted_average.py,sha256=E4byEA2VfXozu7S_gnYVvwI3qg8AFWaSeNRHGbs2Tno,3340
+fusion_bench/method/wudi/__init__.py,sha256=08qPzOlhjw-Ab8TwyY9MGOGx_TLrUTueJc1WgRIvuxU,44
+fusion_bench/method/wudi/wudi.py,sha256=HL3Y0MPjozp7NML_UNjIWWPbQDQxYH_WG_BuyripeBQ,3602
 fusion_bench/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/metrics/continual_learning/__init__.py,sha256=f-mkv4SpXTq5kiQVHbe2g0IPf4yLFgu1Dw7g2DOK6T4,57
 fusion_bench/metrics/continual_learning/backward_transfer.py,sha256=LCMWFFmBgWv7UIAJqiTaSvVvanx4qjnXIGuCMYvzmtc,559
@@ -262,10 +265,10 @@ fusion_bench/mixins/__init__.py,sha256=yjRvcB9Mn-c0g8tXmoBf2Dn8gyc-Na6dyhc4r674a
 fusion_bench/mixins/clip_classification.py,sha256=8dqJuI3AVetFZKuzTp1SR2kGQ-vGvfbcmwfnzuUiwfI,10096
 fusion_bench/mixins/fabric_training.py,sha256=ZmycEhCaNCgVi5oM9m0q6msxgk3quowmFvDAcvskFrg,13017
 fusion_bench/mixins/hydra_config.py,sha256=rfT-XPUKV_U3nvuTVsKLmSmEiieoSIsbhxE5_-E0er0,5508
-fusion_bench/mixins/lightning_fabric.py,sha256=ns9H_dkSDD8jJ7GL4YcAypewUcy9mzbX3Xy0bBcyGVY,7403
+fusion_bench/mixins/lightning_fabric.py,sha256=5iamAL7YV6lEm_-8NuzFjfIy1vslwKthSpCSWLLhlCM,7506
 fusion_bench/mixins/openclip_classification.py,sha256=O45HzgLXNvlQr5RVpfIGsYdIQ0tY5g_68KB0MTqsZWU,290
 fusion_bench/mixins/rich_live.py,sha256=j7wNgrgwfdpl6nCXZGF_2DLtNq2aqCb_52Qhe9QSltc,495
-fusion_bench/mixins/serialization.py,sha256=A2zEe3RIUhj60S8ENvjdMORz9zJ0bRnrAD54x1XIvao,15117
+fusion_bench/mixins/serialization.py,sha256=z73Mmq952TIdPwwZ8cRdl3n0_uc9lqylFI9fxKesREs,13260
 fusion_bench/mixins/simple_profiler.py,sha256=czWMl6p9PoxbQ5A8Uifwleaq5QPGEn0qMc8MXu9dSZM,2200
 fusion_bench/mixins/optim/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/mixins/optim/adamw_with_warmup.py,sha256=qTnRl8GVVIfaplOFBHnJFuZUbxPZRWRGHGNzm_EDhDE,1421
@@ -277,7 +280,7 @@ fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RP
 fusion_bench/modelpool/lazy_state_dict_pool.py,sha256=HtEA85rqSCHfsIddI5sKDcZf5kSuHNwrb8fF1TUSTr0,652
 fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
 fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
-fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=dSmjAhL4AxD34ckCdE8Rnf1hN5opoPIuz-hducQeK38,18685
+fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=gpUQqxZIuKoaQ-gvdPsLVxI7UifueR6k3YzbUV1i0lk,19902
 fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
 fusion_bench/modelpool/clip_vision/modelpool.py,sha256=e5t9olRMOj_SyGVy-gqn7RwC5FAqxNsJDongWIv2KFY,7108
 fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
@@ -288,8 +291,8 @@ fusion_bench/modelpool/seq_classification_lm/__init__.py,sha256=_VB9nlR_gm6IEXNM
 fusion_bench/modelpool/seq_classification_lm/reward_model.py,sha256=NKf-eoei1GdU4ojKSpN5_kQwax4uUEStnlKyh8qOrNg,540
 fusion_bench/modelpool/seq_classification_lm/seq_classification_lm.py,sha256=t9wXHFwa7V2XC3ajxt4_bSsxMTDKW4nebvdxhG7VeLM,3435
 fusion_bench/models/__init__.py,sha256=LeLQw2Yphu4QKZxjws_7MCM50XvFP1rTrvJ_2SR5zIA,271
-fusion_bench/models/hf_clip.py,sha256=056UHeSjKKDYXg-o7CC2zsx4fC9R6IBkPGI8IFhWTNw,7291
-fusion_bench/models/hf_utils.py,sha256=ozS56t69BOGy_wvbjX6MKFUuGsfKqy6s_TsinldNetk,5435
+fusion_bench/models/hf_clip.py,sha256=lL4LxbdwC_rDWRozdEJmRlzKaNcQMpWwCSMDE0tfZRM,7525
+fusion_bench/models/hf_utils.py,sha256=bfB3QAUqsG-TyUeOWrZt8V7GeWDhp-fKg3P0J3D_TbQ,5497
 fusion_bench/models/parameter_dict.py,sha256=HCkTJCz23pYN1_Hhegx8gglOtrnzVKJPMeg9_rUhe18,3630
 fusion_bench/models/rankone_moe.py,sha256=aY8IDM-ct7qKYH8ukBUsa_VDkDgGNtCqyNtNKlDTUTc,12046
 fusion_bench/models/separate_io.py,sha256=5AJlCxkHdVVffITnIRlF3ZIaKLRWDhJESVQN1lX-ZhU,3835
@@ -318,7 +321,7 @@ fusion_bench/models/llama/model_utils/mod.py,sha256=xzNOgTRfOK9q8kml4Q2nmSOl23f3
 fusion_bench/models/llama/model_utils/visual.py,sha256=wpqWqEASyA7WhJLCfC26h0Cdn5CXnwC1qPJUlSXggo4,8310
 fusion_bench/models/masks/__init__.py,sha256=vXG6jrBkDbPsnrX6nMEYAW1rQuGEWDgdjID7cKzXvrs,69
 fusion_bench/models/masks/mask_model.py,sha256=YXNZ_CGp6VPshZH__Znh6Z07BqOK53G-Ltc1LVy1E3I,5502
-fusion_bench/models/model_card_templates/default.md,sha256=Abd8tUhdZU-B5jwc7N6Gm0zLGNkfx6fr7MAL03VtFDg,885
+fusion_bench/models/model_card_templates/default.md,sha256=DJXwDODCsqIOhkgP57-iCShxLYK_jnsDsJYH1GfbBY8,1028
 fusion_bench/models/modeling_deepseek_v2/__init__.py,sha256=trXrhtKb_gIxXVo7wSZ-il5sLJtDTiNZezRrEt3M8zM,505
 fusion_bench/models/modeling_deepseek_v2/configuration_deepseek.py,sha256=TblFOCfNwaXUnXnD-sxFhSn5Df-_yy2LMcrth-sBPFI,10301
 fusion_bench/models/modeling_deepseek_v2/modeling_deepseek.py,sha256=PtfkfPrfmQVoLiVhgqlp5toJAnCinPWfeZYeJJtWWBs,78676
@@ -362,7 +365,7 @@ fusion_bench/models/smile_moe/utils/svd_utils.py,sha256=A2u7lH5Bo2qhgwplHPAz56pd
 fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9zGKN7VpE70,53
 fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
 fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/models/wrappers/ensemble.py,sha256=MQ92yxI_D8AzzA8sbpZE-rp-vWxO0tTICFnF8Y1Gyss,6380
+fusion_bench/models/wrappers/ensemble.py,sha256=T-DAKrAm-ciZwV6Hbt8uASbjtoQpHTlvVyan3rhk_8k,11632
 fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=A7LjG0inL5oeEVOkJwEUDM15v4dpQnsCq2y9zA78R3k,11198
 fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=q5Hc4BtLpAawMbxsWJRL-8OR-x7994Jhr9IyN7vKZ9o,16930
 fusion_bench/models/wrappers/task_wise_fusion.py,sha256=ROLANdDq0bZ3sIROqIv3udPN8lzDdEwxD0Jonx-5ycw,17465
@@ -377,7 +380,7 @@ fusion_bench/programs/__init__.py,sha256=oGoRp2TMI6ELxyfkeTg2h27hZJEDz9x31Asmvwv
 fusion_bench/programs/base_program.py,sha256=Bl_bv8SawEUc-GBTtZFMoii0y-r-0hOXBAJkQFexWCU,3475
 fusion_bench/programs/fabric_fusion_program.py,sha256=jt0_tlg37a2jBl2YikaC0N71Gmr4J340wkKAekyT180,12453
 fusion_bench/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/scripts/cli.py,sha256=VwcwqY--kGDEGI1RoTQ5X32FaKducdRUKf2CZRXcfCM,2739
+fusion_bench/scripts/cli.py,sha256=kEWLEkZEBqUr1_-XTePzNC5NM8lwWvgUBf0Lcuk_FI8,2739
 fusion_bench/scripts/imgui.py,sha256=r9Glbfbwu3JCsX9TKQFwcHarvwA_G7ff0jWBUPW1S1U,7613
 fusion_bench/scripts/nyuv2_mtl_train.py,sha256=W1C45R9NdF4O-UjCx1bUxRTdFE0-FlRpwJHZ5gY18rI,3602
 fusion_bench/scripts/webui.py,sha256=ryA-2leSnHcYA88tTAYzJGDhiljbi0vl1Fibejzndlw,14398
@@ -392,7 +395,7 @@ fusion_bench/taskpool/clip_vision/__init__.py,sha256=ItdyWYy2A5xQKzh1dXi9kbQTBig
 fusion_bench/taskpool/clip_vision/clip_rankone_moe_taskpool.py,sha256=t_lmo8W-ZgLLOiBnF5CWfaLbKwz3EXfO8gCavI34qQY,3733
 fusion_bench/taskpool/clip_vision/clip_smile_taskpool.py,sha256=UdI7npI53LjPV2B19tHymhbma6WYcZIvzhqaSyZKkSQ,4762
 fusion_bench/taskpool/clip_vision/clip_sparse_wemoe_taskpool.py,sha256=8lZIG6tWpctYzme0Q_n6QcGnn9MeDmP3UX8nEv4_a9Q,4232
-fusion_bench/taskpool/clip_vision/taskpool.py,sha256=3JPN_1B9ylG0-Q69UELdQgakrgxRRQbj9x6LvTlw_J0,16177
+fusion_bench/taskpool/clip_vision/taskpool.py,sha256=99F8w_e4-UnoeDkSjo0z_8Wstx6e635h0IqSdtfT7ms,16460
 fusion_bench/taskpool/clip_vision/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/taskpool/clip_vision/utils/routing_analysis_utils.py,sha256=LY9wxWCm_4X7Ii0ZkMxhtbevz6OxS3Bkqz0puXhuRqM,2393
 fusion_bench/taskpool/llama/__init__.py,sha256=iB4ESMgnsl0m-z0YtRdPZiwGGv96-86R8pbSnkdet8Q,57
@@ -442,7 +445,7 @@ fusion_bench/utils/__init__.py,sha256=wNAfpP-u_-8HGbLaBoHT_wriU_cNvY4M_UXdBv2kXh
 fusion_bench/utils/auto.py,sha256=uACQLE62_kNyhl4BGduvcbyeTE61qXpIJx3Ccl8kh68,920
 fusion_bench/utils/cache_utils.py,sha256=-bTZijQgl4BuAx0VSJFD-bSDOXuq3o0NkrOaiLiyofU,4795
 fusion_bench/utils/data.py,sha256=aalB3kGbZUF-PZ_IaAhcXanRKhS-RNMT5mUrEBb4R3E,6722
-fusion_bench/utils/devices.py,sha256=i5g2FzFs-UWhekcwzxVUZBOw82pOP-RbjIISbfWnuoM,8357
+fusion_bench/utils/devices.py,sha256=6AkGcs3flt0FSo9yfEREuehoTrgcc65gkwpTWQy8XsI,9546
 fusion_bench/utils/dict.py,sha256=ZCK0CRRT_B1Z18WY_GOYcmth7k5x9Jn1k7XhAVWRu98,1379
 fusion_bench/utils/dtype.py,sha256=z6UlPGF9dzG4Ik8rXGf59PJk_RKzG6Trp8O6wcBS9PU,4360
 fusion_bench/utils/expr.py,sha256=zwHNrtIbOMnIChU-0ZI5qLbDva8zvHbizL-4F2TwM14,2386
@@ -452,14 +455,14 @@ fusion_bench/utils/hydra_utils.py,sha256=TklUDKDEZlg4keI-TEZiqh4gFjr9-61Rt1RMlqk
 fusion_bench/utils/instantiate_utils.py,sha256=OXkfhq_o3Sgy5n3Psf-HI-dIfbK9oD2GBdfcx3gT63Q,17526
 fusion_bench/utils/json.py,sha256=sVCqbm9mmyHybiui-O57KFt_ULrjLtN2wipSo6VDvqE,2533
 fusion_bench/utils/lazy_imports.py,sha256=dg4Uu8FaoEu0WGVTo5o_PbLZs3Ei_RG75Ta-Us1iPW4,3500
-fusion_bench/utils/lazy_state_dict.py,sha256=9dse7U3QZNvNxBINb02Q9DW2_-voUh2Ri2B6hk9wvNI,20227
+fusion_bench/utils/lazy_state_dict.py,sha256=srEKyctbuBW3yrVFSG7Tki_XkBwoc6eUmDXLxHXqX0o,20328
 fusion_bench/utils/misc.py,sha256=93q0m-HYWkPK91Co5lll_J0Dxs6YahW2lD_X8fUAyTk,2420
 fusion_bench/utils/modelscope.py,sha256=P8fV6Eff8oP0LVGIFGbLvuk8MBteysN438djZ6ZEfE4,10699
 fusion_bench/utils/packages.py,sha256=wKl-qtPjA61LrdgTTusuNyvs8jfUv4mA5IwPTFWyYtA,2139
 fusion_bench/utils/parameters.py,sha256=ufEDOYJwcQQxLfveK8hBAGwpu5J3LA_cTWiDgZ2zkJ0,11788
 fusion_bench/utils/path.py,sha256=qrfgar3b-6_2v032-2hTt97L6qdtG7zc3CFrGFyKSGE,2400
 fusion_bench/utils/pylogger.py,sha256=r2KXTvq-j8uHdjBBoVPOgkjv4c6pyhbX6xf1JbOsF4w,3335
-fusion_bench/utils/rich_utils.py,sha256=XNPUpa1grna_C0MLQs0nY25-Kfutpj9BOEzvjoH7nR0,5849
+fusion_bench/utils/rich_utils.py,sha256=24RF-OHK6h9ggZ95csw_vMU8YtxYNOxlzjcH7dpuESY,5863
 fusion_bench/utils/set.py,sha256=_43ZvGKJ_BK9sUslsSNhi7xEfuAQuyj3vViImnGpnCY,134
 fusion_bench/utils/state_dict_arithmetic.py,sha256=fczHDEpL2_UmxNIdvQtllXvBWBcmKpw-p6CIS_upjwI,11818
 fusion_bench/utils/tensorboard.py,sha256=9fkgNYR9LM38nPNkudcxL9TjLUseW-280M0k2nLff7o,1669
@@ -472,7 +475,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
 fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
 fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
 fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
-fusion_bench-0.2.23.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
+fusion_bench-0.2.24.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
 fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
 fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
 fusion_bench_config/fabric_model_fusion.yaml,sha256=U8BxsaOvsg9bsEZcIpBE-feo9n9G7Y1kQDHqPVxUYAg,2601
@@ -621,7 +624,7 @@ fusion_bench_config/method/dare/ties_merging.yaml,sha256=7gDW4XpezrsccsbJGqqKrbX
 fusion_bench_config/method/dawe/dawe_for_clip.yaml,sha256=99P5xpp1YGvIwXGxDcxRtJMLE2FhvEFmFBQjOMEcGoc,1023
 fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=CtZI3YPMJNDy225yhOJbSiMKlsc-X5nCFzmVh0dvr-w,78
 fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=khdpCvKMNytx4nZSgtUJFXv44MVytXu0aqUVd9TixXo,57
-fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=Ih9dqifpnvxW2QfJqp8Q8S8W1k7VZG9ulyPxkcuaWsw,54
+fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=RKa3IgN3DfFZVmeXVIdTt0NdPVV0jFkpQz6SxLs3Kso,124
 fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=2KD3PjFglqL7fjqhjXtOWxZ1mvmYodiNVroXsFd7EGE,261
 fusion_bench_config/method/expert_sparsity/README.md,sha256=CLE0-XblXDWCUTHPaTNtBH-YquXn-uawwTJiYrgjMaA,239
 fusion_bench_config/method/expert_sparsity/mixtral.yaml,sha256=maFL3LM0zfnQ1eXoNXUslSjgZmpOdUJgl_a31dYUBbc,605
@@ -638,8 +641,9 @@ fusion_bench_config/method/linear/expo.yaml,sha256=St3NW6cKVRV3vCn8y0gxQ8k66VTdt
 fusion_bench_config/method/linear/linear_interpolation.yaml,sha256=chM6_HRKKcMleTeuKY3-YNI1qaMG2CfnsRwUxAlHsRw,66
 fusion_bench_config/method/linear/llama_expo.yaml,sha256=SvqamjT06BMObQ58sks5x7Wv6kGpp3-Nlw3ihbD_kSA,621
 fusion_bench_config/method/linear/llama_expo_with_dare.yaml,sha256=Pp8s2xmEg5XSvaGKtwTYx_PzcGvwRh2gPpZ6u9as4_E,383
-fusion_bench_config/method/linear/simple_average_for_llama.yaml,sha256=r2Zul2GaMEEQ7NEDf8yiAgEiMDPNibU4qsJ0toD2KjQ,319
-fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml,sha256=N7cyHm6a2QwNsV9uaJp-eZmdbs9kmdRrkxtO58QQQgM,116
+fusion_bench_config/method/linear/simple_average_for_causallm.yaml,sha256=qqeIr61PJEcfZclZ5vV64GCzyt-8b1zB0FDZu8DsbXQ,322
+fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml,sha256=tJA0n0_XVvll4rZYVHQVqFCz8W3Bey6NjPKMIH3-P0U,142
+fusion_bench_config/method/linear/ties_merging_for_causallm.yaml,sha256=1oEIdxV0OqWjDQ9V_lmXEPUayp4KbKHE2SvpCLmiKOU,489
 fusion_bench_config/method/linear/weighted_average.yaml,sha256=uq2gHGCwVHHSa1H-hzcrSlumUTLJ50tfyiY1Mh1pFsk,186
 fusion_bench_config/method/linear/weighted_average_for_llama.yaml,sha256=se2aq6t5R1f-ZG6ubUyRr__DBe9BzXrgL81ua3DkQoM,498
 fusion_bench_config/method/lm_finetune/bradley_terry_rm.yaml,sha256=QHsRfJK9K4KajsX3LBHG8cDt7ZLJWxOBnJjpHRQSB_s,1348
@@ -686,6 +690,7 @@ fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=-Ipc05T
 fusion_bench_config/method/wemoe/flan_t5_weight_ensembling_moe.yaml,sha256=KIKUr_Q4e9pJSVlqUFatuLp5vg8kNEsn8tOE4R77sxA,653
 fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=mMVaFJWUZmIdhg0kVQY20i7cmgTMrOSgoSpmW7quRzc,993
 fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
+fusion_bench_config/method/wudi/wudi.yaml,sha256=3mJ6-XKHwwHALS3d503ybGM7pc1PhEK91YwwMybuzMc,76
 fusion_bench_config/model/clip-vit/README.md,sha256=-s34C9X7pxy55xSc24kbf-4ctK7UC-Wpu_JWIe9O0Ko,1382
 fusion_bench_config/model/clip-vit/clip-vit-base-patch16.yaml,sha256=Fn7or7-5fVZNyp6fH1lkwk7mq7iVhpR3sMt6Sm7Yg6I,43
 fusion_bench_config/model/clip-vit/clip-vit-base-patch16_TALL14.yaml,sha256=8G2OCCDaSJkzDOMDsV08NE-Z5YWMjDsFVs1WY3OWNss,787
@@ -841,9 +846,11 @@ fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL14_
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20.yaml,sha256=FuPWQbC9xEV5wZjuo835gOMNgbzmpK9RbjFjA_HOzqo,2476
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_TALL20_model_only.yaml,sha256=9PCkbrNnQSKTsm4eoUvVgjGd3IY7wHBC4LWj4kOdY4Y,1406
 fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-large-patch14_individual.yaml,sha256=bqnyzgwIvDtV3Fb-uLf9mdFv0NW1C392lxGsGUPLsKE,400
-fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_coder.yaml,sha256=D8HdBRGUYD-c-c38oSgzcP3fkNhBN-tVdqLnS_B-7zc,265
+fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_math_and_code.yaml,sha256=4DoMFlGabtwZXZMGWsWtkP2rlGOx_1eEPp_AyqyVln0,263
+fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml,sha256=ofFFVYKHKtylxd90REMLhhP57Yqwe2AEbGuZ0mBCVz8,305
 fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-7B-math_and_coder.yaml,sha256=Nxk72MurqSzEyPJzGoKFbk5T2TGWBwYpH2V9Jzqt648,229
 fusion_bench_config/modelpool/CausalLMPool/deepseek-v2-lite.yaml,sha256=8gr8ZtgegSHV0GHtJBiEgdYbRe8UHhO4_y8dayxZChk,506
+fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml,sha256=mm7A3NilcANJBuCZMt3MMLKFm7CjBhMYWAa9TXjM_PQ,326
 fusion_bench_config/modelpool/CausalLMPool/llama_alpaca_cleaned.yaml,sha256=oDsZkuAoh1mWUC7jZNzw8794zgX2bV5Z0esXpvbTs-c,643
 fusion_bench_config/modelpool/CausalLMPool/llama_codealpaca.yaml,sha256=FuUsBrvk3_bQiciMRlNsO5vp6AKHQM_-g-8bmU8251w,641
 fusion_bench_config/modelpool/CausalLMPool/llama_for_causallm.yaml,sha256=H3UwSk4ChsGSrH49LuttxldFURW-4RVUtnIa0ClHKXo,802
@@ -927,8 +934,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=3q-KMuFaM
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
-fusion_bench-0.2.23.dist-info/METADATA,sha256=HQZ3DxHk-Jtcj2AZT49tx5m7VdVkDEglivhkfQv258Q,22384
-fusion_bench-0.2.23.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-fusion_bench-0.2.23.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
-fusion_bench-0.2.23.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
-fusion_bench-0.2.23.dist-info/RECORD,,
+fusion_bench-0.2.24.dist-info/METADATA,sha256=DllRpMnvVgyeqjN_YlNeo7IlqukzOjuYO_cWopOo1tA,22621
+fusion_bench-0.2.24.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+fusion_bench-0.2.24.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
+fusion_bench-0.2.24.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
+fusion_bench-0.2.24.dist-info/RECORD,,

fusion_bench_config/method/ensemble/simple_ensemble.yaml CHANGED Viewed

	@@ -1 +1,2 @@
1 1	_target_: fusion_bench.method.SimpleEnsembleAlgorithm
2	+ device_map: null # Set to null for single device, or specify mapping

fusion_bench_config/method/linear/{simple_average_for_llama.yaml → simple_average_for_causallm.yaml} RENAMED Viewed

@@ -1,4 +1,4 @@
-_target_: fusion_bench.method.SimpleAverageForLlama
+_target_: fusion_bench.method.SimpleAverageForCausalLM
 # set `merge_backbone` to true if you has a base model and only want to merge the backbone of the experts
 # if `merge_backbone` is False, this is equivalent to `SimpleAverageAlgorithm`
 merge_backbone: false

fusion_bench_config/method/linear/task_arithmetic_for_causallm.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+_target_: fusion_bench.method.TaskArithmeticForCausalLM
+scaling_factor: 0.3
+merge_backbone: false
+model_save_path: ${path.log_dir}/checkpoint

fusion_bench_config/method/linear/ties_merging_for_causallm.yaml ADDED Viewed

@@ -0,0 +1,13 @@
+_target_: fusion_bench.method.TiesMergingForCausalLM
+# Scaling factor $\lambda$
+scaling_factor: 0.3
+# Threshold for resetting values in the task vector
+threshold: 20
+# List of keys to remove from the state dict, default is empty
+remove_keys: []
+# Function to merge the models, default is sum. Options are 'sum', 'mean', and 'max'
+merge_func: sum
+# Whether to merge only the backbone layers
+merge_backbone: false
+# Path to save the merged model
+model_save_path: ${path.log_dir}/checkpoint

fusion_bench_config/method/wudi/wudi.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+_target_: fusion_bench.method.WUDIMerging
+iter_num: 400
+exclude_keys: null

fusion_bench_config/modelpool/CausalLMPool/{Qwen2.5-1.5B_math_and_coder.yaml → Qwen2.5-1.5B_math_and_code.yaml} RENAMED Viewed

@@ -1,7 +1,6 @@
 _target_: fusion_bench.modelpool.CausalLMPool
 _recursive_: false
-enable_lazy_loading: false
+enable_lazy_loading: true
 models:
   _pretrained_: Qwen/Qwen2.5-1.5B
   math: Qwen/Qwen2.5-Math-1.5B

fusion_bench_config/modelpool/CausalLMPool/Qwen2.5-1.5B_three_models.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+_recursive_: false
+enable_lazy_loading: true
+models:
+  _pretrained_: Qwen/Qwen2.5-1.5B
+  math: Qwen/Qwen2.5-Math-1.5B
+  code: Qwen/Qwen2.5-Coder-1.5B
+  instruction: Qwen/Qwen2.5-1.5B-Instruct
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: Qwen/Qwen2.5-1.5B

fusion_bench_config/modelpool/CausalLMPool/llama-7b_3-models_v1.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+_recursive_: false
+enable_lazy_loading: true
+models:
+  _pretrained_: meta-llama/Llama-2-7b-hf
+  chat: meta-llama/Llama-2-7b-chat-hf
+  math: WizardLMTeam/WizardMath-7B-V1.0
+  code: codellama/CodeLlama-7b-hf
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-2-7b-hf

fusion_bench_config/method/linear/task_arithmetic_for_llama.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-_target_: fusion_bench.method.TaskArithmeticForLlama
-scaling_factor: 0.3
-merge_backbone: true
-model_save_path: null

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.23.dist-info → fusion_bench-0.2.24.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

fusion-bench 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl