PyPI - fusion-bench - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl - Mend

fusion-bench 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

fusion_bench/modelpool/resnet_for_image_classification.py ADDED Viewed

@@ -0,0 +1,208 @@
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Literal,
+    Optional,
+    TypeVar,
+    Union,
+    override,
+)
+import torch
+from omegaconf import DictConfig
+from torch import nn
+from fusion_bench import BaseModelPool, auto_register_config, get_rankzero_logger
+from fusion_bench.tasks.clip_classification import get_classnames, get_num_classes
+if TYPE_CHECKING:
+    from torchvision.models import ResNet as TorchVisionResNet
+log = get_rankzero_logger(__name__)
+def load_torchvision_resnet(
+    model_name: str, weights: Optional[str], num_classes: Optional[int]
+) -> "TorchVisionResNet":
+    import torchvision.models
+    model_fn = getattr(torchvision.models, model_name)
+    model: "TorchVisionResNet" = model_fn(weights=weights)
+    if num_classes is not None:
+        model.fc = nn.Linear(model.fc.in_features, num_classes)
+    return model
+def load_transformers_resnet(
+    config_path: str, pretrained: bool, dataset_name: Optional[str]
+):
+    from transformers import AutoConfig, ResNetForImageClassification
+    if pretrained:
+        model = ResNetForImageClassification.from_pretrained(config_path)
+    else:
+        config = AutoConfig.from_pretrained(config_path)
+        model = ResNetForImageClassification(config)
+    if dataset_name is None:
+        return model
+    classnames = get_classnames(dataset_name)
+    id2label = {i: c for i, c in enumerate(classnames)}
+    label2id = {c: i for i, c in enumerate(classnames)}
+    model.config.id2label = id2label
+    model.config.label2id = label2id
+    model.classifier[1] = (
+        nn.Linear(
+            model.classifier[1].in_features,
+            len(classnames),
+        )
+        if model.config.num_labels > 0
+        else nn.Identity()
+    )
+    return model
+@auto_register_config
+class ResNetForImageClassificationPool(BaseModelPool):
+    def __init__(self, type: str, **kwargs):
+        super().__init__(**kwargs)
+        assert type in ["torchvision", "transformers"]
+    def load_processor(
+        self, stage: Literal["train", "val", "test"] = "test", *args, **kwargs
+    ):
+        if self.type == "torchvision":
+            from torchvision import transforms
+            to_tensor = transforms.ToTensor()
+            normalize = transforms.Normalize(
+                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+            )
+            if stage == "train":
+                train_transform = transforms.Compose(
+                    [
+                        transforms.RandomResizedCrop(224),
+                        transforms.RandomHorizontalFlip(),
+                        to_tensor,
+                        normalize,
+                    ]
+                )
+                return train_transform
+            else:
+                val_transform = transforms.Compose(
+                    [
+                        transforms.Resize(256),
+                        transforms.CenterCrop(224),
+                        to_tensor,
+                        normalize,
+                    ]
+                )
+                return val_transform
+        elif self.type == "transformers":
+            from transformers import AutoImageProcessor
+            if self.has_pretrained:
+                config_path = self._models["_pretrained_"].config_path
+            else:
+                for model_cfg in self._models.values():
+                    if isinstance(model_cfg, str):
+                        config_path = model_cfg
+                        break
+                    if "config_path" in model_cfg:
+                        config_path = model_cfg["config_path"]
+                        break
+            return AutoImageProcessor.from_pretrained(config_path)
+    @override
+    def load_model(self, model_name_or_config: Union[str, DictConfig], *args, **kwargs):
+        log.debug(f"Loading model: {model_name_or_config}", stacklevel=2)
+        if (
+            isinstance(model_name_or_config, str)
+            and model_name_or_config in self._models
+        ):
+            model_name_or_config = self._models[model_name_or_config]
+        if self.type == "torchvision":
+            from torchvision.models import (
+                resnet18,
+                resnet34,
+                resnet50,
+                resnet101,
+                resnet152,
+            )
+            match model_name_or_config:
+                case "resnet18":
+                    model = resnet18()
+                case "resnet34":
+                    model = resnet34()
+                case "resnet50":
+                    model = resnet50()
+                case "resnet101":
+                    model = resnet101()
+                case "resnet152":
+                    model = resnet152()
+                case dict() | DictConfig() as model_config:
+                    if "dataset_name" in model_config:
+                        num_classes = get_num_classes(model_config["dataset_name"])
+                        if "num_classes" in model_config:
+                            assert (
+                                num_classes == model_config["num_classes"]
+                            ), f"num_classes mismatch: {num_classes} vs {model_config['num_classes']}"
+                    elif "num_classes" in model_config:
+                        num_classes = model_config["num_classes"]
+                    else:
+                        num_classes = None
+                    model = load_torchvision_resnet(
+                        model_name=model_config["model_name"],
+                        weights=model_config.get("weights", None),
+                        num_classes=num_classes,
+                    )
+                case _:
+                    raise ValueError(
+                        f"Invalid model_name_or_config type: {type(model_name_or_config)}"
+                    )
+        elif self.type == "transformers":
+            match model_name_or_config:
+                case str() as model_path:
+                    from transformers import AutoModelForImageClassification
+                    model = AutoModelForImageClassification.from_pretrained(model_path)
+                case dict() | DictConfig() as model_config:
+                    model = load_transformers_resnet(
+                        config_path=model_config["config_path"],
+                        pretrained=model_config.get("pretrained", False),
+                        dataset_name=model_config.get("dataset_name", None),
+                    )
+                case _:
+                    raise ValueError(
+                        f"Invalid model_name_or_config type: {type(model_name_or_config)}"
+                    )
+            # override forward to return logits only
+            original_forward = model.forward
+            model.forward = lambda pixel_values, **kwargs: original_forward(
+                pixel_values=pixel_values, **kwargs
+            ).logits
+            model.original_forward = original_forward
+        else:
+            raise ValueError(f"Unknown model type: {self.type}")
+        return model
+    @override
+    def save_model(self, model, path, *args, **kwargs):
+        if self.type == "torchvision":
+            torch.save(model.state_dict(), path)
+        elif self.type == "transformers":
+            model.save_pretrained(path)
+            self.load_processor().save_pretrained(path)
+        else:
+            raise ValueError(f"Unknown model type: {self.type}")

fusion_bench/models/__init__.py CHANGED Viewed

@@ -1,10 +1,36 @@
 # flake8: noqa F401
-from fusion_bench.utils import LazyStateDict
-from . import separate_io, utils
-from .hf_utils import (
-    create_default_model_card,
-    load_model_card_template,
-    save_pretrained_with_remote_code,
-)
-from .parameter_dict import ParameterDictModel
+import sys
+from typing import TYPE_CHECKING
+from fusion_bench.utils.lazy_imports import LazyImporter
+from . import utils
+_extra_objects = {
+    "utils": utils,
+}
+_import_structure = {
+    "hf_utils": [
+        "create_default_model_card",
+        "load_model_card_template",
+        "save_pretrained_with_remote_code",
+    ],
+    "parameter_dict": ["ParameterDictModel"],
+    "separate_io": ["separate_load", "separate_save"],
+}
+if TYPE_CHECKING:
+    from .hf_utils import (
+        create_default_model_card,
+        load_model_card_template,
+        save_pretrained_with_remote_code,
+    )
+    from .parameter_dict import ParameterDictModel
+    from .separate_io import separate_load, separate_save
+else:
+    sys.modules[__name__] = LazyImporter(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        extra_objects=_extra_objects,
+    )

fusion_bench/models/hf_clip.py CHANGED Viewed

@@ -195,5 +195,9 @@ class HFCLIPClassifier(nn.Module):
             pass
         elif isinstance(image_embeds, BaseModelOutputWithPooling):
             image_embeds = image_embeds[1]
+        elif isinstance(image_embeds, dict) and "pooler_output" in image_embeds:
+            image_embeds = image_embeds["pooler_output"]
+        else:
+            raise ValueError("Unsupported output type from vision model outputs")
         image_embeds = self.clip_model.visual_projection(image_embeds)
         return image_embeds

fusion_bench/models/hf_utils.py CHANGED Viewed

@@ -143,7 +143,7 @@ def save_pretrained_with_remote_code(
 def create_default_model_card(
     models: list[str],
-    *,
+    base_model: Optional[str] = None,
     title: str = "Deep Model Fusion",
     tags: list[str] = ["fusion-bench", "merge"],
     description=None,
@@ -154,6 +154,7 @@ def create_default_model_card(
     template: Template = Template(load_model_card_template("default.md"))
     card = template.render(
+        base_model=base_model,
         models=models,
         library_name="transformers",
         title=title,

fusion_bench/models/model_card_templates/default.md CHANGED Viewed

@@ -1,5 +1,8 @@
 ---
 base_model:
+{%- if base_model is not none %}
+- {{ base_model }}
+{%- endif %}
 {%- for model in models %}
 - {{ model }}
 {%- endfor %}
@@ -18,7 +21,11 @@ tags:
 This is a merged model created using [fusion-bench](https://github.com/tanganke/fusion_bench).
 The following models were included in the merge:
-{% for model in models %}
+{% if base_model is not none %}
+- base model: {{ base_model }}
+{%- endif %}
+{%- for model in models %}
 - {{ model }}
 {%- endfor %}

fusion_bench/models/wrappers/ensemble.py CHANGED Viewed

@@ -1,10 +1,17 @@
-from typing import Any, Callable, Dict, List, Union, cast
+import logging
+from typing import Any, Callable, Dict, Generic, List, Union, cast
 import numpy as np
 import torch
+import torch.futures
 from omegaconf import ListConfig
 from torch import Tensor, nn
+from fusion_bench.utils.devices import to_device
+from fusion_bench.utils.type import TorchModelType
+log = logging.getLogger(__name__)
 def aggregate_tensors(
     outputs: List[Any], aggregate_fn: Callable
@@ -58,12 +65,16 @@ def aggregate_tensors(
         raise ValueError("Unsupported type for outputs")
-class EnsembleModule(nn.Module):
+class EnsembleModule(nn.Module, Generic[TorchModelType]):
     """
     Ensemble module that averages the outputs of multiple models.
     """
-    def __init__(self, models: List[nn.Module]):
+    def __init__(
+        self,
+        models: List[TorchModelType],
+        device_map: Dict[int, Union[int, str]] | None = None,
+    ):
         """
         Initializes the EnsembleModule with a list of models.
@@ -73,6 +84,16 @@ class EnsembleModule(nn.Module):
         super().__init__()
         # TODO: distribute models to devices
         self.model_list = nn.ModuleList(models)
+        self.device_map = device_map
+        if self.device_map is not None:
+            self._move_models_to_devices()
+    def _move_models_to_devices(self):
+        for model_idx, device_id in self.device_map.items():
+            log.info(f"Moving model {model_idx} to device {device_id}")
+            self.model_list[model_idx] = self.model_list[model_idx].to(
+                device_id, non_blocking=True
+            )
     def _aggregate_tensors(self, outputs: List[Tensor]) -> Tensor:
         """
@@ -86,6 +107,49 @@ class EnsembleModule(nn.Module):
         """
         return torch.stack(outputs).mean(dim=0)
+    def _parallel_forward_with_device_map(self, *args: Any, **kwargs: Any) -> List[Any]:
+        """
+        Performs parallel forward pass using device mapping with futures.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        Returns:
+            List[Any]: List of outputs from all models, all moved to the same device.
+        """
+        futures = []
+        device_data_cache = {}
+        for i, model in enumerate(self.model_list):
+            device_id = self.device_map.get(i, "cpu")
+            if device_id not in device_data_cache:
+                # Move inputs to the same device as the model
+                device_args = to_device(
+                    args, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_kwargs = to_device(
+                    kwargs, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_data_cache[device_id] = (device_args, device_kwargs)
+            else:
+                device_args, device_kwargs = device_data_cache[device_id]
+            # Create a future for asynchronous execution
+            future = torch.jit.fork(model, *device_args, **device_kwargs)
+            futures.append(future)
+        # Wait for all futures to complete and collect results
+        outputs = [torch.jit.wait(future) for future in futures]
+        # Move all outputs to the same device (use the device of the first model or cpu as fallback)
+        target_device = self.device_map.get(0, "cpu") if self.device_map else "cpu"
+        outputs = [
+            to_device(output, target_device, non_blocking=True) for output in outputs
+        ]
+        return outputs
     def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by averaging the outputs of the models.
@@ -97,20 +161,25 @@ class EnsembleModule(nn.Module):
         Returns:
             Aggregated output from the ensemble of models.
         """
-        outputs = [model(*args, **kwargs) for model in self.model_list]
+        if self.device_map is None:
+            outputs = [model(*args, **kwargs) for model in self.model_list]
+        else:
+            # Parallel execution with device mapping
+            outputs = self._parallel_forward_with_device_map(*args, **kwargs)
         return aggregate_tensors(outputs, self._aggregate_tensors)
-class WeightedEnsembleModule(nn.Module):
+class WeightedEnsembleModule(nn.Module, Generic[TorchModelType]):
     """
     Ensemble module that computes a weighted average of the outputs from multiple models.
     """
     def __init__(
         self,
-        models: List[nn.Module],
+        models: List[TorchModelType],
         weights: List[float] | Tensor | np.ndarray,
         normalize: bool = True,
+        device_map: Dict[int, Union[int, str]] | None = None,
     ):
         """
         Initializes the WeightedEnsembleModule with models and their corresponding weights.
@@ -119,9 +188,12 @@ class WeightedEnsembleModule(nn.Module):
             models (List[nn.Module]): List of models to ensemble.
             weights (List[float] | Tensor | np.ndarray): Weights for each model.
             normalize (bool, optional): If True, normalizes the weights. Defaults to True.
+            device_map (Dict[int, Union[int, str]] | None, optional): Device mapping for parallel execution. Defaults to None.
         """
         super().__init__()
         self.model_list = nn.ModuleList(models)
+        self.device_map = device_map
         if isinstance(weights, (list, tuple, ListConfig)):
             weights = torch.tensor(weights)
         elif isinstance(weights, Tensor):
@@ -139,6 +211,17 @@ class WeightedEnsembleModule(nn.Module):
             weights = weights / weights.sum()
         self.register_buffer("weights", weights)
+        if self.device_map is not None:
+            self._move_models_to_devices()
+    def _move_models_to_devices(self):
+        """Move models to their assigned devices according to device_map."""
+        for model_idx, device_id in self.device_map.items():
+            log.info(f"Moving model {model_idx} to device {device_id}")
+            self.model_list[model_idx] = self.model_list[model_idx].to(
+                device_id, non_blocking=True
+            )
     def _aggregate_tensors(self, outputs: List[Tensor]) -> Tensor:
         """
         Aggregates a list of tensors using the provided weights.
@@ -152,6 +235,48 @@ class WeightedEnsembleModule(nn.Module):
         weights = cast(Tensor, self.weights).view(-1, *([1] * outputs[0].dim()))
         return (torch.stack(outputs) * weights).sum(dim=0)
+    def _parallel_forward_with_device_map(self, *args: Any, **kwargs: Any) -> List[Any]:
+        """
+        Performs parallel forward pass using device mapping with futures.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        Returns:
+            List[Any]: List of outputs from all models, all moved to the same device.
+        """
+        futures = []
+        device_data_cache = {}
+        for i, model in enumerate(self.model_list):
+            device_id = self.device_map.get(i, "cpu")
+            if device_id not in device_data_cache:
+                # Move inputs to the same device as the model
+                device_args = to_device(
+                    args, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_kwargs = to_device(
+                    kwargs, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_data_cache[device_id] = (device_args, device_kwargs)
+            else:
+                device_args, device_kwargs = device_data_cache[device_id]
+            # Create a future for asynchronous execution
+            future = torch.jit.fork(model, *device_args, **device_kwargs)
+            futures.append(future)
+        # Wait for all futures to complete and collect results
+        outputs = [torch.jit.wait(future) for future in futures]
+        # Move all outputs to the same device (use the device of the first model or cpu as fallback)
+        target_device = self.device_map.get(0, "cpu") if self.device_map else "cpu"
+        outputs = [to_device(output, target_device) for output in outputs]
+        return outputs
     def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by computing the weighted average of the models' outputs.
@@ -163,7 +288,11 @@ class WeightedEnsembleModule(nn.Module):
         Returns:
             Weighted aggregated output from the ensemble of models.
         """
-        outputs = [model(*args, **kwargs) for model in self.model_list]
+        if self.device_map is None:
+            outputs = [model(*args, **kwargs) for model in self.model_list]
+        else:
+            # Parallel execution with device mapping
+            outputs = self._parallel_forward_with_device_map(*args, **kwargs)
         return aggregate_tensors(outputs, self._aggregate_tensors)

fusion_bench/optim/__init__.py CHANGED Viewed

@@ -1,2 +1,40 @@
-from . import exception, lr_scheduler
-from .mezo import MeZO
+import sys
+from typing import TYPE_CHECKING
+from fusion_bench.utils.lazy_imports import LazyImporter
+from . import lr_scheduler
+_extra_objects = {
+    "lr_scheduler": lr_scheduler,
+}
+_import_structure = {
+    "exception": [
+        "NoClosureError",
+        "NoSparseGradientError",
+        "NegativeLRError",
+        "NegativeStepError",
+        "ZeroParameterSizeError",
+    ],
+    "mezo": ["MeZO"],
+    "muon": ["Muon"],
+}
+if TYPE_CHECKING:
+    from .exception import (
+        NegativeLRError,
+        NegativeStepError,
+        NoClosureError,
+        NoSparseGradientError,
+        ZeroParameterSizeError,
+    )
+    from .mezo import MeZO
+    from .muon import Muon
+else:
+    sys.modules[__name__] = LazyImporter(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        extra_objects=_extra_objects,
+    )

fusion_bench/optim/lr_scheduler/__init__.py CHANGED Viewed

@@ -1 +1,27 @@
-from .linear_warmup import *
+import sys
+from typing import TYPE_CHECKING
+from fusion_bench.utils.lazy_imports import LazyImporter
+_import_structure = {
+    "linear_warmup": [
+        "BaseLinearWarmupScheduler",
+        "LinearWarmupScheduler",
+        "CosineDecayWithWarmup",
+        "PolySchedulerWithWarmup",
+    ],
+}
+if TYPE_CHECKING:
+    from .linear_warmup import (
+        BaseLinearWarmupScheduler,
+        CosineDecayWithWarmup,
+        LinearWarmupScheduler,
+        PolySchedulerWithWarmup,
+    )
+else:
+    sys.modules[__name__] = LazyImporter(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+    )

fusion-bench 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

fusion-bench 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl