PyPI - fusion-bench - Versions diffs - 0.2.18__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

fusion-bench 0.2.18py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

fusion_bench/__init__.py CHANGED Viewed

@@ -1,3 +1,9 @@
+# ███████╗██╗   ██╗███████╗██╗ ██████╗ ███╗   ██╗      ██████╗ ███████╗███╗   ██╗ ██████╗██╗  ██╗
+# ██╔════╝██║   ██║██╔════╝██║██╔═══██╗████╗  ██║      ██╔══██╗██╔════╝████╗  ██║██╔════╝██║  ██║
+# █████╗  ██║   ██║███████╗██║██║   ██║██╔██╗ ██║█████╗██████╔╝█████╗  ██╔██╗ ██║██║     ███████║
+# ██╔══╝  ██║   ██║╚════██║██║██║   ██║██║╚██╗██║╚════╝██╔══██╗██╔══╝  ██║╚██╗██║██║     ██╔══██║
+# ██║     ╚██████╔╝███████║██║╚██████╔╝██║ ╚████║      ██████╔╝███████╗██║ ╚████║╚██████╗██║  ██║
+# ╚═╝      ╚═════╝ ╚══════╝╚═╝ ╚═════╝ ╚═╝  ╚═══╝      ╚═════╝ ╚══════╝╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝
 # flake8: noqa: F401
 from . import (
     constants,

fusion_bench/constants/banner.py ADDED Viewed

@@ -0,0 +1,12 @@
+FUSION_BENCH_BANNER = (
+    ""
+    + "███████╗██╗   ██╗███████╗██╗ ██████╗ ███╗   ██╗      ██████╗ ███████╗███╗   ██╗ ██████╗██╗  ██╗\n"
+    + "██╔════╝██║   ██║██╔════╝██║██╔═══██╗████╗  ██║      ██╔══██╗██╔════╝████╗  ██║██╔════╝██║  ██║\n"
+    + "█████╗  ██║   ██║███████╗██║██║   ██║██╔██╗ ██║█████╗██████╔╝█████╗  ██╔██╗ ██║██║     ███████║\n"
+    + "██╔══╝  ██║   ██║╚════██║██║██║   ██║██║╚██╗██║╚════╝██╔══██╗██╔══╝  ██║╚██╗██║██║     ██╔══██║\n"
+    + "██║     ╚██████╔╝███████║██║╚██████╔╝██║ ╚████║      ██████╔╝███████╗██║ ╚████║╚██████╗██║  ██║\n"
+    + "╚═╝      ╚═════╝ ╚══════╝╚═╝ ╚═════╝ ╚═╝  ╚═══╝      ╚═════╝ ╚══════╝╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝\n"
+)
+if __name__ == "__main__":
+    print(FUSION_BENCH_BANNER)

fusion_bench/method/linear/simple_average_for_llama.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Optional
+from copy import deepcopy
+from typing import TYPE_CHECKING, Optional
 from typing_extensions import override
@@ -6,6 +7,11 @@ from fusion_bench import timeit_context
 from fusion_bench.method.base_algorithm import BaseAlgorithm
 from fusion_bench.method.simple_average import SimpleAverageAlgorithm
 from fusion_bench.modelpool import CausalLMBackbonePool, CausalLMPool
+from fusion_bench.utils.pylogger import getRankZeroLogger
+from omegaconf import flag_override
+from fusion_bench.utils import instantiate
+log = getRankZeroLogger(__name__)
 class SimpleAverageForLlama(BaseAlgorithm):
@@ -40,12 +46,20 @@ class SimpleAverageForLlama(BaseAlgorithm):
         if self.merge_backbone:
             assert modelpool.has_pretrained
-            backbone_modelpool = CausalLMBackbonePool(**modelpool.config)
+            log.info(
+                "Merging backbone of the model pool, use CausalLMBackbonePool instead of CausalLMPool."
+            )
+            modelpool_config = deepcopy(modelpool.config)
+            with flag_override(modelpool_config, "allow_objects", True):
+                modelpool_config._target_ = (
+                    "fusion_bench.modelpool.causal_lm.CausalLMBackbonePool"
+                )
+            backbone_modelpool = instantiate(modelpool_config)
             model = modelpool.load_model("_pretrained_")
             backbone_model = SimpleAverageAlgorithm().run(backbone_modelpool)
             model.model.layers = backbone_model
         else:
-            model = SimpleAverageAlgorithm().run()
+            model = SimpleAverageAlgorithm().run(modelpool=modelpool)
         if self.model_save_path is not None:
             with timeit_context(f"Saving the model to {self.model_save_path}"):

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -8,6 +8,7 @@ from torch import nn
 from fusion_bench.method.base_algorithm import BaseAlgorithm
 from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
+from fusion_bench.utils import LazyStateDict
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
     state_dict_avg,
@@ -104,6 +105,15 @@ class SimpleAverageAlgorithm(
             # Divide the accumulated state dictionary by the number of models to get the average
             sd = state_dict_div(sd, len(modelpool.model_names))
+        if isinstance(forward_model, LazyStateDict):
+            # if the model is a LazyStateDict, convert it to an empty module
+            forward_model = forward_model.meta_module.to_empty(
+                device=(
+                    "cpu"
+                    if forward_model._torch_dtype is None
+                    else forward_model._torch_dtype
+                )
+            )
         forward_model.load_state_dict(sd)
         # print profile report and log the merged models
         self.print_profile_summary()

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -22,6 +22,8 @@ from typing_extensions import override
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils import instantiate
 from fusion_bench.utils.dtype import parse_dtype
+from fusion_bench.utils.lazy_state_dict import LazyStateDict
+from fusion_bench.utils.packages import import_object
 log = logging.getLogger(__name__)
@@ -30,6 +32,7 @@ class CausalLMPool(BaseModelPool):
     _config_mapping = BaseModelPool._config_mapping | {
         "_tokenizer": "tokenizer",
         "_model_kwargs": "model_kwargs",
+        "load_lazy": "load_lazy",
     }
     def __init__(
@@ -38,6 +41,7 @@ class CausalLMPool(BaseModelPool):
         *,
         tokenizer: Optional[DictConfig],
         model_kwargs: Optional[DictConfig] = None,
+        load_lazy: bool = False,
         **kwargs,
     ):
         super().__init__(models, **kwargs)
@@ -51,6 +55,7 @@ class CausalLMPool(BaseModelPool):
                 self._model_kwargs.torch_dtype = parse_dtype(
                     self._model_kwargs.torch_dtype
                 )
+        self.load_lazy = load_lazy
     @override
     def load_model(
@@ -88,21 +93,41 @@ class CausalLMPool(BaseModelPool):
         model_kwargs.update(kwargs)
         if isinstance(model_name_or_config, str):
+            # If model_name_or_config is a string, it is the name or the path of the model
             log.info(f"Loading model: {model_name_or_config}", stacklevel=2)
             if model_name_or_config in self._models.keys():
                 model_config = self._models[model_name_or_config]
                 if isinstance(model_config, str):
                     # model_config is a string
-                    model = AutoModelForCausalLM.from_pretrained(
-                        model_config,
-                        *args,
-                        **model_kwargs,
-                    )
+                    if not self.load_lazy:
+                        model = AutoModelForCausalLM.from_pretrained(
+                            model_config,
+                            *args,
+                            **model_kwargs,
+                        )
+                    else:
+                        # model_config is a string, but we want to use LazyStateDict
+                        model = LazyStateDict(
+                            checkpoint=model_config,
+                            meta_module_class=AutoModelForCausalLM,
+                            *args,
+                            **model_kwargs,
+                        )
                     return model
         elif isinstance(model_name_or_config, (DictConfig, Dict)):
             model_config = model_name_or_config
-        model = instantiate(model_config, *args, **model_kwargs)
+        if not self.load_lazy:
+            model = instantiate(model_config, *args, **model_kwargs)
+        else:
+            meta_module_class = model_config.pop("_target_")
+            checkpoint = model_config.pop("pretrained_model_name_or_path")
+            model = LazyStateDict(
+                checkpoint=checkpoint,
+                meta_module_class=meta_module_class,
+                *args,
+                **model_kwargs,
+            )
         return model
     def load_tokenizer(self, *args, **kwargs) -> PreTrainedTokenizer:
@@ -179,6 +204,12 @@ class CausalLMBackbonePool(CausalLMPool):
     def load_model(
         self, model_name_or_config: str | DictConfig, *args, **kwargs
     ) -> Module:
+        if self.load_lazy:
+            log.warning(
+                "CausalLMBackbonePool does not support lazy loading. "
+                "Falling back to normal loading."
+            )
+            self.load_lazy = False
         model: AutoModelForCausalLM = super().load_model(
             model_name_or_config, *args, **kwargs
         )

fusion-bench 0.2.18__py3-none-any.whl → 0.2.19__py3-none-any.whl

fusion-bench 0.2.18py3-none-any.whl → 0.2.19py3-none-any.whl