PyPI - fusion-bench - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl - Mend

fusion-bench 0.2.14py3-none-any.whl → 0.2.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -1,15 +1,19 @@
+"""
+Online documentation for this module: https://tanganke.github.io/fusion_bench/modelpool/causal_lm
+"""
 import logging
 import os
 from copy import deepcopy
-from typing import Any, Optional, TypeAlias, Union, cast  # noqa: F401
+from typing import Any, Dict, Optional, TypeAlias, Union, cast  # noqa: F401
 import peft
 from omegaconf import DictConfig, flag_override
 from torch import nn
 from torch.nn.modules import Module
 from transformers import (
-    LlamaForCausalLM,
-    MistralForCausalLM,
+    AutoModelForCausalLM,
+    AutoTokenizer,
     PreTrainedModel,
     PreTrainedTokenizer,
 )
@@ -21,8 +25,6 @@ from fusion_bench.utils.dtype import parse_dtype
 log = logging.getLogger(__name__)
-CausalLM: TypeAlias = Union[LlamaForCausalLM, MistralForCausalLM, Any]
 class CausalLMPool(BaseModelPool):
     _config_mapping = BaseModelPool._config_mapping | {
@@ -56,17 +58,78 @@ class CausalLMPool(BaseModelPool):
         model_name_or_config: str | DictConfig,
         *args,
         **kwargs,
-    ) -> LlamaForCausalLM | MistralForCausalLM | nn.Module:
+    ) -> PreTrainedModel:
+        """
+        Example of YAML config:
+        ```yaml
+        models:
+          _pretrained_: path_to_pretrained_model # if a plain string, it will be passed to AutoModelForCausalLM.from_pretrained
+          model_a: path_to_model_a
+          model_b: path_to_model_b
+        ```
+        or equivalently,
+        ```yaml
+        models:
+          _pretrained_:
+            _target_: transformers.AutoModelForCausalLM # any callable that returns a model
+            pretrained_model_name_or_path: path_to_pretrained_model
+          model_a:
+            _target_: transformers.AutoModelForCausalLM
+            pretrained_model_name_or_path: path_to_model_a
+          model_b:
+            _target_: transformers.AutoModelForCausalLM
+            pretrained_model_name_or_path: path_to_model_b
+        ```
+        """
         model_kwargs = deepcopy(self._model_kwargs)
         model_kwargs.update(kwargs)
         if isinstance(model_name_or_config, str):
             log.info(f"Loading model: {model_name_or_config}", stacklevel=2)
-        return super().load_model(model_name_or_config, *args, **model_kwargs)
+            if model_name_or_config in self._models.keys():
+                model_config = self._models[model_name_or_config]
+                if isinstance(model_config, str):
+                    # model_config is a string
+                    model = AutoModelForCausalLM.from_pretrained(
+                        model_config,
+                        *args,
+                        **model_kwargs,
+                    )
+                    return model
+        elif isinstance(model_name_or_config, (DictConfig, Dict)):
+            model_config = model_name_or_config
+        model = instantiate(model_config, *args, **model_kwargs)
+        return model
     def load_tokenizer(self, *args, **kwargs) -> PreTrainedTokenizer:
+        """
+        Example of YAML config:
+        ```yaml
+        tokenizer: google/gemma-2-2b-it # if a plain string, it will be passed to AutoTokenizer.from_pretrained
+        ```
+        or equivalently,
+        ```yaml
+        tokenizer:
+          _target_: transformers.AutoTokenizer # any callable that returns a tokenizer
+          pretrained_model_name_or_path: google/gemma-2-2b-it
+        ```
+        Returns:
+            PreTrainedTokenizer: The tokenizer.
+        """
         assert self._tokenizer is not None, "Tokenizer is not defined in the config"
         log.info("Loading tokenizer.", stacklevel=2)
-        tokenizer = instantiate(self._tokenizer, *args, **kwargs)
+        if isinstance(self._tokenizer, str):
+            tokenizer = AutoTokenizer.from_pretrained(self._tokenizer, *args, **kwargs)
+        else:
+            tokenizer = instantiate(self._tokenizer, *args, **kwargs)
         return tokenizer
     @override
@@ -113,7 +176,7 @@ class CausalLMBackbonePool(CausalLMPool):
     def load_model(
         self, model_name_or_config: str | DictConfig, *args, **kwargs
     ) -> Module:
-        model: Union[MistralForCausalLM, LlamaForCausalLM, Any] = super().load_model(
+        model: AutoModelForCausalLM = super().load_model(
             model_name_or_config, *args, **kwargs
         )
         return model.model.layers
@@ -126,7 +189,7 @@ def load_peft_causal_lm(
     is_trainable: bool = True,
     merge_and_unload: bool = False,
 ):
-    base_model = LlamaForCausalLM.from_pretrained(
+    base_model = AutoModelForCausalLM.from_pretrained(
         base_model_path, torch_dtype=torch_dtype
     )
     model = peft.PeftModel.from_pretrained(

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.14
+Version: 0.2.15
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -63,7 +63,7 @@ Dynamic: license-file
 </div>
-> [!TIP]
+> [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
 ## Overview
@@ -157,6 +157,9 @@ pip install -e ".[lm-eval-harness]"
 This will install the latest version of fusion-bench and the dependencies required for LM-Eval Harness.
 Documentation for using LM-Eval Harness within FusionBench framework can be found at [this online documentation](https://tanganke.github.io/fusion_bench/taskpool/lm_eval_harness) or in the [`docs/taskpool/lm_eval_harness.md`](docs/taskpool/lm_eval_harness.md) markdown file.
+> [!TIP]
+> Documentation for merging large language models using FusionBench can be found at [this online documentation](https://tanganke.github.io/fusion_bench/modelpool/causal_lm) or in the [`docs/modelpool/causal_lm.md`](docs/modelpool/causal_lm.md) markdown file.
 ## Introduction to Deep Model Fusion
 Deep model fusion is a technique that merges, ensemble, or fuse multiple deep neural networks to obtain a unified model.

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/RECORD RENAMED Viewed

@@ -219,7 +219,7 @@ fusion_bench/modelpool/huggingface_automodel.py,sha256=OJ6EyYyjNv1_Bhjn-zli-e__B
 fusion_bench/modelpool/huggingface_gpt2_classification.py,sha256=j8nicVwtoLXY4RPE2dcepeEB3agBKkkH-xA3yMj1czw,2014
 fusion_bench/modelpool/nyuv2_modelpool.py,sha256=btuXmYxwfjI6MnGakhoOf53Iyb9fxYH20CavGTrTcnA,1375
 fusion_bench/modelpool/causal_lm/__init__.py,sha256=F432-aDIgAbUITj4GNZS9dgUKKhaDMCbTeHB-9MecaQ,99
-fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=k0eOOcFbswVgBYhM9CEXvdCRU9zVC8Gw78QaiMWzeWo,4487
+fusion_bench/modelpool/causal_lm/causal_lm.py,sha256=fO8lF8YWwoe43sVVOqHW9Ike7x-924-I6QQgZqx9EgA,6505
 fusion_bench/modelpool/clip_vision/__init__.py,sha256=3b9gN2bWUsoA1EmpitnIMnIlX7nklxbkn4WJ0QJtS2c,43
 fusion_bench/modelpool/clip_vision/modelpool.py,sha256=JH1wLdWefvE242SYpXTnoSLkKX-YcadnidWd2bo8tWQ,5486
 fusion_bench/modelpool/openclip_vision/__init__.py,sha256=QDmAitKqUwRygN9QncdS_kGWZdfTKL4uUifC8xh9c10,47
@@ -393,7 +393,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
 fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
 fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
 fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
-fusion_bench-0.2.14.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
+fusion_bench-0.2.15.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
 fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
 fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=7IxLQoLRz-sRWyV8Vqc5kQcmYE_9YQz2_77pmvAkum8,1207
 fusion_bench_config/fabric_model_fusion.yaml,sha256=5iPgaM8UOhuvBW2Hap_csst-eqlYRwb_lru8ngjrZ_g,948
@@ -729,6 +729,14 @@ fusion_bench_config/modelpool/CausalLMPool/llama_ultrachat.yaml,sha256=MpgshGtmM
 fusion_bench_config/modelpool/CausalLMPool/qwen2_math_1.5B_and_R1.yaml,sha256=Kbpam1Hds5URMP35dXGdVibH-vTmYPh3xHMkhj6Mgtg,648
 fusion_bench_config/modelpool/CausalLMPool/simle_mixtral_exp_v4.yaml,sha256=FynhZ1PRvyzsyzrHIuMpGgQGRMlu_xI7earm-CeIVeY,824
 fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=zQWfp7mYm6jQ8g41Eeh2d9vAbocZJ5btPX1ft9QpEZU,546
+fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B-Instruct.yaml,sha256=NDq_prH-b9Vw7lRjsyJIcbeF4MXVVdszxK1FPJxIJYs,453
+fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml,sha256=Mg_z2vnw7IkNPoMvhl_Ja6gT9tX942sqaNfjXQRzBvg,390
+fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B-Instruct.yaml,sha256=SfPEji6mWx9Dw48rE0B8MDrYv2NVLC-S98DK5xaU6So,453
+fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml,sha256=2vpOp9t8SUP2rkBw21mqwRYApkqXQiaYXcZm2oxLox4,390
+fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml,sha256=8up_cqEhabGeK6l6tMha9DJzsPoEIFN8bS_Kwv7LmCc,389
+fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml,sha256=SODG0kcnAP6yC0_J_SpSVMRV-v5qGV22gcWdiBaZo1I,368
+fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml,sha256=zwInWJS8yrhch4vOL1ypRKNWWpJKlhQsyY0Ln14CC-M,389
+fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml,sha256=ufmu4b3lyxn2XLDMVYxP-bKwYaGTjB5-JoYXLG8v8tY,368
 fusion_bench_config/modelpool/OpenCLIPVisionModelPool/README.md,sha256=DC0HF-isCHshipHTC0Rof6GvjTUa0i2DVQZKrklQQlU,2416
 fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-16_TA8.yaml,sha256=jbJqqciORJQknpSzh2zKiFm6VKDOsmaSk9XfPCVmHGg,1220
 fusion_bench_config/modelpool/OpenCLIPVisionModelPool/ViT-B-32_TA8.yaml,sha256=q2_E2R1wIOdxd-AF-wjXkPO64gJgD27YXsZ8FFLWUIo,1607
@@ -790,8 +798,8 @@ fusion_bench_config/taskpool/LMEvalHarnessTaskPool/lm_eval.yaml,sha256=45kSz44pc
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-16_TA8.yaml,sha256=GjpiiRownrBCpl-TNwWRW2PYePbF-Cl99jlLNPrK5T4,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-B-32_TA8.yaml,sha256=WwiYMQKehtJixDPnu5o3vcWe4yJksXTWRqOzm3uVWXQ,1017
 fusion_bench_config/taskpool/OpenCLIPVisionModelTaskPool/ViT-L-14_TA8.yaml,sha256=xGRt0J9joXTzWUew6DvoYprAWlPXhaVFw5AX4im5VQw,1017
-fusion_bench-0.2.14.dist-info/METADATA,sha256=X13MPJ_FA0D5Gc5T-CvbcYOK03QtTiyIHnDNbI7_aOo,20904
-fusion_bench-0.2.14.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-fusion_bench-0.2.14.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
-fusion_bench-0.2.14.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
-fusion_bench-0.2.14.dist-info/RECORD,,
+fusion_bench-0.2.15.dist-info/METADATA,sha256=abOyRl-ejl7CvLRCaRP20vn7rdb5OF92GxS_S9qTK3Q,21171
+fusion_bench-0.2.15.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+fusion_bench-0.2.15.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
+fusion_bench-0.2.15.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
+fusion_bench-0.2.15.dist-info/RECORD,,

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B-Instruct.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.1-8B-Instruct
+  instruction: MergeBench/Llama-3.1-8B-Instruct_instruction
+  math: MergeBench/Llama-3.1-8B-Instruct_math
+  coding: MergeBench/Llama-3.1-8B-Instruct_coding
+  multilingual: MergeBench/Llama-3.1-8B-Instruct_multilingual
+  safety: MergeBench/Llama-3.1-8B-Instruct_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.1-8B-Instruct

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.1-8B.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.1-8B
+  instruction: MergeBench/Llama-3.1-8B_instruction
+  math: MergeBench/Llama-3.1-8B_math
+  coding: MergeBench/Llama-3.1-8B_coding
+  multilingual: MergeBench/Llama-3.1-8B_multilingual
+  safety: MergeBench/Llama-3.1-8B_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.1-8B

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B-Instruct.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.2-3B-Instruct
+  instruction: MergeBench/Llama-3.2-3B-Instruct_instruction
+  math: MergeBench/Llama-3.2-3B-Instruct_math
+  coding: MergeBench/Llama-3.2-3B-Instruct_coding
+  multilingual: MergeBench/Llama-3.2-3B-Instruct_multilingual
+  safety: MergeBench/Llama-3.2-3B-Instruct_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.2-3B-Instruct

fusion_bench_config/modelpool/CausalLMPool/mergebench/Llama-3.2-3B.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: meta-llama/Llama-3.2-3B
+  instruction: MergeBench/Llama-3.2-3B_instruction
+  math: MergeBench/Llama-3.2-3B_math
+  coding: MergeBench/Llama-3.2-3B_coding
+  multilingual: MergeBench/Llama-3.2-3B_multilingual
+  safety: MergeBench/Llama-3.2-3B_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: meta-llama/Llama-3.2-3B

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b-it.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-2b-it
+  instruction: MergeBench/gemma-2-2b-it_instruction
+  math: MergeBench/gemma-2-2b-it_math
+  coding: MergeBench/gemma-2-2b-it_coding
+  multilingual: MergeBench/gemma-2-2b-it_multilingual
+  safety: MergeBench/gemma-2-2b-it_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-2b-it

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-2b.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-2b
+  instruction: MergeBench/gemma-2-2b_instruction
+  math: MergeBench/gemma-2-2b_math
+  coding: MergeBench/gemma-2-2b_coding
+  multilingual: MergeBench/gemma-2-2b_multilingual
+  safety: MergeBench/gemma-2-2b_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-2b

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b-it.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-9b-it
+  instruction: MergeBench/gemma-2-9b-it_instruction
+  math: MergeBench/gemma-2-9b-it_math
+  coding: MergeBench/gemma-2-9b-it_coding
+  multilingual: MergeBench/gemma-2-9b-it_multilingual
+  safety: MergeBench/gemma-2-9b-it_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-9b-it

fusion_bench_config/modelpool/CausalLMPool/mergebench/gemma-2-9b.yaml ADDED Viewed

@@ -0,0 +1,11 @@
+_target_: fusion_bench.modelpool.CausalLMPool
+models:
+  _pretrained_: google/gemma-2-9b
+  instruction: MergeBench/gemma-2-9b_instruction
+  math: MergeBench/gemma-2-9b_math
+  coding: MergeBench/gemma-2-9b_coding
+  multilingual: MergeBench/gemma-2-9b_multilingual
+  safety: MergeBench/gemma-2-9b_safety
+model_kwargs:
+  torch_dtype: bfloat16
+tokenizer: google/gemma-2-9b

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/WHEEL RENAMED Viewed

File without changes

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.14.dist-info → fusion_bench-0.2.15.dist-info}/top_level.txt RENAMED Viewed

File without changes

fusion-bench 0.2.14__py3-none-any.whl → 0.2.15__py3-none-any.whl

fusion-bench 0.2.14py3-none-any.whl → 0.2.15py3-none-any.whl