PyPI - fusion-bench - Versions diffs - 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl - Mend

fusion-bench 0.2.18py3-none-any.whl → 0.2.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -8,6 +8,7 @@ from torch import nn
 from fusion_bench.method.base_algorithm import BaseAlgorithm
 from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
+from fusion_bench.utils import LazyStateDict
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
     state_dict_avg,
@@ -62,6 +63,18 @@ class SimpleAverageAlgorithm(
     BaseAlgorithm,
     SimpleProfilerMixin,
 ):
+    _config_mapping = BaseAlgorithm._config_mapping | {
+        "show_pbar": "show_pbar",
+    }
+    def __init__(self, show_pbar: bool = False):
+        """
+        Args:
+            show_pbar (bool): If True, shows a progress bar during model loading and merging. Default is False.
+        """
+        super().__init__()
+        self.show_pbar = show_pbar
     @torch.no_grad()
     def run(self, modelpool: Union[BaseModelPool, Dict[str, nn.Module]]):
         """
@@ -99,11 +112,24 @@ class SimpleAverageAlgorithm(
                     forward_model = model
                 else:
                     # Add the current model's state dictionary to the accumulated state dictionary
-                    sd = state_dict_add(sd, model.state_dict(keep_vars=True))
+                    sd = state_dict_add(
+                        sd, model.state_dict(keep_vars=True), show_pbar=self.show_pbar
+                    )
         with self.profile("merge weights"):
             # Divide the accumulated state dictionary by the number of models to get the average
-            sd = state_dict_div(sd, len(modelpool.model_names))
+            sd = state_dict_div(
+                sd, len(modelpool.model_names), show_pbar=self.show_pbar
+            )
+        if isinstance(forward_model, LazyStateDict):
+            # if the model is a LazyStateDict, convert it to an empty module
+            forward_model = forward_model.meta_module.to_empty(
+                device=(
+                    "cpu"
+                    if forward_model._torch_dtype is None
+                    else forward_model._torch_dtype
+                )
+            )
         forward_model.load_state_dict(sd)
         # print profile report and log the merged models
         self.print_profile_summary()

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -22,6 +22,8 @@ from typing_extensions import override
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils import instantiate
 from fusion_bench.utils.dtype import parse_dtype
+from fusion_bench.utils.lazy_state_dict import LazyStateDict
+from fusion_bench.utils.packages import import_object
 log = logging.getLogger(__name__)
@@ -30,6 +32,7 @@ class CausalLMPool(BaseModelPool):
     _config_mapping = BaseModelPool._config_mapping | {
         "_tokenizer": "tokenizer",
         "_model_kwargs": "model_kwargs",
+        "load_lazy": "load_lazy",
     }
     def __init__(
@@ -38,6 +41,7 @@ class CausalLMPool(BaseModelPool):
         *,
         tokenizer: Optional[DictConfig],
         model_kwargs: Optional[DictConfig] = None,
+        load_lazy: bool = False,
         **kwargs,
     ):
         super().__init__(models, **kwargs)
@@ -51,6 +55,7 @@ class CausalLMPool(BaseModelPool):
                 self._model_kwargs.torch_dtype = parse_dtype(
                     self._model_kwargs.torch_dtype
                 )
+        self.load_lazy = load_lazy
     @override
     def load_model(
@@ -88,21 +93,41 @@ class CausalLMPool(BaseModelPool):
         model_kwargs.update(kwargs)
         if isinstance(model_name_or_config, str):
+            # If model_name_or_config is a string, it is the name or the path of the model
             log.info(f"Loading model: {model_name_or_config}", stacklevel=2)
             if model_name_or_config in self._models.keys():
                 model_config = self._models[model_name_or_config]
                 if isinstance(model_config, str):
                     # model_config is a string
-                    model = AutoModelForCausalLM.from_pretrained(
-                        model_config,
-                        *args,
-                        **model_kwargs,
-                    )
+                    if not self.load_lazy:
+                        model = AutoModelForCausalLM.from_pretrained(
+                            model_config,
+                            *args,
+                            **model_kwargs,
+                        )
+                    else:
+                        # model_config is a string, but we want to use LazyStateDict
+                        model = LazyStateDict(
+                            checkpoint=model_config,
+                            meta_module_class=AutoModelForCausalLM,
+                            *args,
+                            **model_kwargs,
+                        )
                     return model
         elif isinstance(model_name_or_config, (DictConfig, Dict)):
             model_config = model_name_or_config
-        model = instantiate(model_config, *args, **model_kwargs)
+        if not self.load_lazy:
+            model = instantiate(model_config, *args, **model_kwargs)
+        else:
+            meta_module_class = model_config.pop("_target_")
+            checkpoint = model_config.pop("pretrained_model_name_or_path")
+            model = LazyStateDict(
+                checkpoint=checkpoint,
+                meta_module_class=meta_module_class,
+                *args,
+                **model_kwargs,
+            )
         return model
     def load_tokenizer(self, *args, **kwargs) -> PreTrainedTokenizer:
@@ -179,6 +204,12 @@ class CausalLMBackbonePool(CausalLMPool):
     def load_model(
         self, model_name_or_config: str | DictConfig, *args, **kwargs
     ) -> Module:
+        if self.load_lazy:
+            log.warning(
+                "CausalLMBackbonePool does not support lazy loading. "
+                "Falling back to normal loading."
+            )
+            self.load_lazy = False
         model: AutoModelForCausalLM = super().load_model(
             model_name_or_config, *args, **kwargs
         )

fusion_bench/modelpool/clip_vision/modelpool.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from copy import deepcopy
-from typing import Optional, Union
+from typing import Literal, Optional, Union
 from datasets import load_dataset
 from lightning.fabric.utilities import rank_zero_only
@@ -11,6 +11,9 @@ from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel
 from typing_extensions import override
 from fusion_bench.utils import instantiate, timeit_context
+from fusion_bench.utils.modelscope import (
+    resolve_repo_path,
+)
 from ..base_pool import BaseModelPool
@@ -25,25 +28,32 @@ class CLIPVisionModelPool(BaseModelPool):
     the specifics of the CLIP Vision models provided by the Hugging Face Transformers library.
     """
-    _config_mapping = BaseModelPool._config_mapping | {"_processor": "processor"}
+    _config_mapping = BaseModelPool._config_mapping | {
+        "_processor": "processor",
+        "_platform": "hf",
+    }
     def __init__(
         self,
         models: DictConfig,
         *,
         processor: Optional[DictConfig] = None,
+        platform: Literal["hf", "huggingface", "modelscope"] = "hf",
         **kwargs,
     ):
         super().__init__(models, **kwargs)
         self._processor = processor
+        self._platform = platform
     def load_processor(self, *args, **kwargs) -> CLIPProcessor:
         assert self._processor is not None, "Processor is not defined in the config"
         if isinstance(self._processor, str):
             if rank_zero_only.rank == 0:
                 log.info(f"Loading `transformers.CLIPProcessor`: {self._processor}")
-            processor = CLIPProcessor.from_pretrained(self._processor)
+            repo_path = resolve_repo_path(
+                repo_id=self._processor, repo_type="model", platform=self._platform
+            )
+            processor = CLIPProcessor.from_pretrained(repo_path, *args, **kwargs)
         else:
             processor = instantiate(self._processor, *args, **kwargs)
         return processor
@@ -54,7 +64,10 @@ class CLIPVisionModelPool(BaseModelPool):
         if isinstance(model_config, str):
             if rank_zero_only.rank == 0:
                 log.info(f"Loading `transformers.CLIPModel`: {model_config}")
-            clip_model = CLIPModel.from_pretrained(model_config, *args, **kwargs)
+            repo_path = resolve_repo_path(
+                repo_id=model_config, repo_type="model", platform=self._platform
+            )
+            clip_model = CLIPModel.from_pretrained(repo_path, *args, **kwargs)
             return clip_model
         else:
             assert isinstance(
@@ -107,14 +120,17 @@ class CLIPVisionModelPool(BaseModelPool):
             if isinstance(model, str):
                 if rank_zero_only.rank == 0:
                     log.info(f"Loading `transformers.CLIPVisionModel`: {model}")
-                return CLIPVisionModel.from_pretrained(model, *args, **kwargs)
+                repo_path = resolve_repo_path(
+                    model, repo_type="model", platform=self._platform
+                )
+                return CLIPVisionModel.from_pretrained(repo_path, *args, **kwargs)
             if isinstance(model, nn.Module):
                 if rank_zero_only.rank == 0:
                     log.info(f"Returning existing model: {model}")
                 return model
-        # If the model is not a string, we use the default load_model method
-        return super().load_model(model_name_or_config, *args, **kwargs)
+        else:
+            # If the model is not a string, we use the default load_model method
+            return super().load_model(model_name_or_config, *args, **kwargs)
     def load_train_dataset(self, dataset_name: str, *args, **kwargs):
         dataset_config = self._train_datasets[dataset_name]
@@ -123,7 +139,7 @@ class CLIPVisionModelPool(BaseModelPool):
                 log.info(
                     f"Loading train dataset using `datasets.load_dataset`: {dataset_config}"
                 )
-            dataset = load_dataset(dataset_config, split="train")
+            dataset = self._load_dataset(dataset_config, split="train")
         else:
             dataset = super().load_train_dataset(dataset_name, *args, **kwargs)
         return dataset
@@ -135,7 +151,7 @@ class CLIPVisionModelPool(BaseModelPool):
                 log.info(
                     f"Loading validation dataset using `datasets.load_dataset`: {dataset_config}"
                 )
-            dataset = load_dataset(dataset_config, split="validation")
+            dataset = self._load_dataset(dataset_config, split="validation")
         else:
             dataset = super().load_val_dataset(dataset_name, *args, **kwargs)
         return dataset
@@ -147,7 +163,24 @@ class CLIPVisionModelPool(BaseModelPool):
                 log.info(
                     f"Loading test dataset using `datasets.load_dataset`: {dataset_config}"
                 )
-            dataset = load_dataset(dataset_config, split="test")
+            dataset = self._load_dataset(dataset_config, split="test")
         else:
             dataset = super().load_test_dataset(dataset_name, *args, **kwargs)
         return dataset
+    def _load_dataset(self, name: str, split: str):
+        """
+        Load a dataset by its name and split.
+        Args:
+            dataset_name (str): The name of the dataset.
+            split (str): The split of the dataset to load (e.g., "train", "validation", "test").
+        Returns:
+            Dataset: The loaded dataset.
+        """
+        datset_dir = resolve_repo_path(
+            name, repo_type="dataset", platform=self._platform
+        )
+        dataset = load_dataset(datset_dir, split=split)
+        return dataset

fusion_bench/scripts/cli.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-This is the CLI script that is executed when the user runs the `fusion-bench` command.
+This is the CLI script that is executed when the user runs the `fusion_bench` command.
 The script is responsible for parsing the command-line arguments, loading the configuration file, and running the fusion algorithm.
 """

fusion-bench 0.2.18__py3-none-any.whl → 0.2.20__py3-none-any.whl

fusion-bench 0.2.18py3-none-any.whl → 0.2.20py3-none-any.whl