PyPI - fusion-bench - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl - Mend

fusion-bench 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

fusion_bench/mixins/serialization.py CHANGED Viewed

@@ -6,6 +6,7 @@ from inspect import Parameter, _ParameterKind
 from pathlib import Path
 from typing import Dict, Mapping, Optional, Union
+from bidict import MutableBidict, bidict
 from omegaconf import DictConfig, OmegaConf
 from fusion_bench.constants import FUSION_BENCH_VERSION
@@ -15,22 +16,29 @@ from fusion_bench.utils.instantiate_utils import set_print_function_call
 log = logging.getLogger(__name__)
 __all__ = [
-    "YAMLSerializationMixin",
     "auto_register_config",
+    "YAMLSerializationMixin",
     "BaseYAMLSerializable",
 ]
-def _get_attr_name(config_mapping: Mapping[str, str], param_name):
-    for attr_name, p in config_mapping.items():
-        if p == param_name:
-            return attr_name
-    else:
-        raise ValueError(f"Parameter {param_name} not found in config mapping.")
+def _set_attr(self, param_name: str, value):
+    """
+    Set an attribute on the object using the parameter name from config mapping.
+    This function looks up the corresponding attribute name for the given parameter
+    name using the object's _config_mapping, then sets that attribute to the
+    specified value. It also logs the operation for debugging purposes.
+    Args:
+        self: The object instance to set the attribute on.
+        param_name (str): The parameter name (config key) to map to an attribute.
+        value: The value to assign to the attribute.
-def _set_attr(self, param_name: str, value):
-    attr_name = _get_attr_name(self._config_mapping, param_name)
+    Raises:
+        ValueError: If the parameter name is not found in the config mapping.
+    """
+    attr_name = self._config_mapping.inverse[param_name]
     log.debug(f"set {attr_name} to {value}. Parameter name: {param_name}")
     setattr(self, attr_name, value)
@@ -59,37 +67,16 @@ def auto_register_config(cls):
             functionality and modified __init__ behavior.
     Behavior:
-        - **Parameter Registration**: All non-variadic parameters (excluding *args, **kwargs)
+        - **Parameter Registration**: All non-variadic parameters (excluding ``*args``, ``**kwargs``)
           from the __init__ method are automatically added to _config_mapping
         - **Positional Arguments**: Handled in order and mapped to corresponding parameter names
         - **Keyword Arguments**: Processed after positional arguments, overriding any conflicts
         - **Default Values**: Applied when parameters are not provided via arguments
         - **Attribute Setting**: All parameters become instance attributes accessible via dot notation
-    Example:
-        ```python
-        @auto_register_config
-        class MyAlgorithm(BaseYAMLSerializable):
-            def __init__(self, learning_rate: float = 0.001, batch_size: int = 32, model_name: str = "default", **kwargs):
-                super().__init__(**kwargs)
-        # All instantiation methods work automatically:
-        algo1 = MyAlgorithm(0.01, 64)  # positional args
-        algo2 = MyAlgorithm(learning_rate=0.01, model_name="bert")  # keyword args
-        algo3 = MyAlgorithm(0.01, batch_size=128, model_name="gpt")  # mixed args
-        # Attributes are automatically set and can be serialized:
-        print(algo1.learning_rate)  # 0.01
-        print(algo1.batch_size)     # 64
-        print(algo1.model_name)     # "default" (from default value)
-        config = algo1.config
-        # DictConfig({'_target_': 'MyAlgorithm', 'learning_rate': 0.01, 'batch_size': 64, 'model_name': 'default'})
-        ```
     Note:
         - The decorator wraps the original __init__ method while preserving its signature for IDE support
-        - Parameters with *args or **kwargs signatures are ignored during registration
+        - Parameters with ``*args`` or ``**kwargs`` signatures are ignored during registration
         - The attributes are auto-registered, then the original __init__ method is called,
         - Type hints, method name, and other metadata are preserved using functools.wraps
         - This decorator is designed to work seamlessly with the YAML serialization system
@@ -103,7 +90,10 @@ def auto_register_config(cls):
     # Auto-register parameters in _config_mapping
     if not "_config_mapping" in cls.__dict__:
-        cls._config_mapping = deepcopy(getattr(cls, "_config_mapping", {}))
+        cls._config_mapping = deepcopy(getattr(cls, "_config_mapping", bidict()))
+    if not isinstance(cls._config_mapping, bidict):
+        cls._config_mapping = bidict(cls._config_mapping)
     registered_parameters = tuple(cls._config_mapping.values())
     for param_name in list(sig.parameters.keys())[1:]:  # Skip 'self'
@@ -116,6 +106,7 @@ def auto_register_config(cls):
         ) and (param_name not in registered_parameters):
             cls._config_mapping[param_name] = param_name
+    @wraps(original_init)
     def __init__(self, *args, **kwargs):
         log.debug(f"set attributes for {self.__class__.__name__} in {cls.__name__}")
         # auto-register the attributes based on the signature
@@ -162,33 +153,10 @@ def auto_register_config(cls):
 class YAMLSerializationMixin:
     _config_key: Optional[str] = None
-    _config_mapping: Dict[str, str] = {}
+    _config_mapping: MutableBidict[str, str] = bidict()
     R"""
     `_config_mapping` is a dictionary mapping the attribute names of the class to the config option names. This is used to convert the class to a DictConfig.
-    For example, if an algorithm class is defined as follows:
-    ```python
-    class SomeModelFusionAlgorithm(BaseModelFusionAlgorithm):
-        hyper_parameter_1 = None
-        hyper_parameter_2 = None
-        _config_mapping = BaseModelFusionAlgorithm._config_mapping | {
-            "hyper_parameter_1" : "hyper_param_1",
-            "hyper_parameter_2" : "hyper_param_2",
-        }
-        def __init__(self, hyper_param_1: int, hyper_param_2: int):
-            self.hyper_parameter_1 = hyper_param_1
-            self.hyper_parameter_2 = hyper_param_2
-            super().__init__()
-    ```
-    The model pool will be converted to a DictConfig as follows:
-    ```python
-    algorithm = SomeModelFusionAlgorithm(hyper_param_1=1, hyper_param_2=2)
-    ```
     >>> algorithm.config
         DictCOnfig({'_target_': 'SomeModelFusionAlgorithm', 'hyper_param_1': 1, 'hyper_param_2': 2})
@@ -207,17 +175,6 @@ class YAMLSerializationMixin:
         This property converts the model pool instance into a dictionary
         configuration, which can be used for serialization or other purposes.
-        Example:
-        ```python
-        model = SomeModelFusionAlgorithm(hyper_param_1=1, hyper_param_2=2)
-        config = model.config
-        print(config)
-        # DictConfig({'_target_': 'SomeModelFusionAlgorithm', 'hyper_param_1': 1, 'hyper_param_2': 2})
-        ```
-        This is useful for serializing the object to a YAML file or for debugging.
         Returns:
             DictConfig: The configuration of the model pool.
         """
@@ -282,16 +239,6 @@ class YAMLSerializationMixin:
                 serialization. This is how the attribute will appear in YAML output.
             value: The value to assign to the attribute.
-        Example:
-            ```python
-            model = BaseYAMLSerializable()
-            model.set_option("learning_rate", "lr", 0.001)
-            # This sets model.learning_rate = 0.001
-            # and maps it to "lr" in the config output
-            config = model.config
-            # config will contain: {"lr": 0.001, ...}
-            ```
         """
         setattr(self, attr_name, value)
         self._config_mapping[attr_name] = param_name

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Online documentation for this module: https://tanganke.github.io/fusion_bench/modelpool/causal_lm
+Online documentation for this module: https://tanganke.github.io/fusion_bench/modelpool/llm
 """
 import logging
@@ -26,6 +26,7 @@ from fusion_bench import (
     instantiate,
     parse_dtype,
 )
+from fusion_bench.models.hf_utils import create_default_model_card
 from fusion_bench.utils.lazy_state_dict import LazyStateDict
 log = logging.getLogger(__name__)
@@ -271,13 +272,16 @@ class CausalLMPool(BaseModelPool):
         save_tokenizer: bool = False,
         tokenizer_kwargs=None,
         tokenizer: Optional[PreTrainedTokenizer] = None,
+        algorithm_config: Optional[DictConfig] = None,
+        description: Optional[str] = None,
+        base_model_in_modelcard: bool = True,
         **kwargs,
     ):
         """Save a model to the specified path with optional tokenizer and Hub upload.
         This method provides comprehensive model saving capabilities including
-        optional tokenizer saving, dtype conversion, and Hugging Face Hub upload.
-        The model is saved in the standard Hugging Face format.
+        optional tokenizer saving, dtype conversion, model card creation, and
+        Hugging Face Hub upload. The model is saved in the standard Hugging Face format.
         Args:
             model: The PreTrainedModel instance to be saved.
@@ -295,15 +299,13 @@ class CausalLMPool(BaseModelPool):
                 when save_tokenizer is True.
             tokenizer: Optional pre-loaded tokenizer instance. If provided, this
                 tokenizer will be saved regardless of the save_tokenizer flag.
+            algorithm_config: Optional DictConfig containing algorithm configuration.
+                If provided, a model card will be created with algorithm details.
+            description: Optional description for the model card. If not provided
+                and algorithm_config is given, a default description will be generated.
             **kwargs: Additional keyword arguments passed to the model's
                 save_pretrained method.
-        Side Effects:
-            - Creates model files in the specified directory
-            - Optionally creates tokenizer files in the same directory
-            - May convert the model to a different dtype
-            - May upload files to Hugging Face Hub
         Example:
             ```python
             >>> pool = CausalLMPool(models=..., tokenizer=...)
@@ -313,7 +315,9 @@ class CausalLMPool(BaseModelPool):
             ...     "/path/to/save",
             ...     save_tokenizer=True,
             ...     model_dtype="float16",
-            ...     push_to_hub=True
+            ...     push_to_hub=True,
+            ...     algorithm_config=algorithm_config,
+            ...     description="Custom merged model"
             ... )
             ```
         """
@@ -337,6 +341,24 @@ class CausalLMPool(BaseModelPool):
             **kwargs,
         )
+        # Create and save model card if algorithm_config is provided
+        if algorithm_config is not None:
+            if description is None:
+                description = "Model created using FusionBench."
+            model_card_str = create_default_model_card(
+                base_model=(
+                    self.get_model_path("_pretrained_")
+                    if base_model_in_modelcard and self.has_pretrained
+                    else None
+                ),
+                models=[self.get_model_path(m) for m in self.model_names],
+                description=description,
+                algorithm_config=algorithm_config,
+                modelpool_config=self.config,
+            )
+            with open(os.path.join(path, "README.md"), "w") as f:
+                f.write(model_card_str)
 class CausalLMBackbonePool(CausalLMPool):
     """A specialized model pool that loads only the transformer backbone layers.

fusion_bench/models/hf_clip.py CHANGED Viewed

@@ -195,5 +195,9 @@ class HFCLIPClassifier(nn.Module):
             pass
         elif isinstance(image_embeds, BaseModelOutputWithPooling):
             image_embeds = image_embeds[1]
+        elif isinstance(image_embeds, dict) and "pooler_output" in image_embeds:
+            image_embeds = image_embeds["pooler_output"]
+        else:
+            raise ValueError("Unsupported output type from vision model outputs")
         image_embeds = self.clip_model.visual_projection(image_embeds)
         return image_embeds

fusion_bench/models/hf_utils.py CHANGED Viewed

@@ -143,7 +143,7 @@ def save_pretrained_with_remote_code(
 def create_default_model_card(
     models: list[str],
-    *,
+    base_model: Optional[str] = None,
     title: str = "Deep Model Fusion",
     tags: list[str] = ["fusion-bench", "merge"],
     description=None,
@@ -154,6 +154,7 @@ def create_default_model_card(
     template: Template = Template(load_model_card_template("default.md"))
     card = template.render(
+        base_model=base_model,
         models=models,
         library_name="transformers",
         title=title,

fusion_bench/models/model_card_templates/default.md CHANGED Viewed

@@ -1,5 +1,8 @@
 ---
 base_model:
+{%- if base_model is not none %}
+- {{ base_model }}
+{%- endif %}
 {%- for model in models %}
 - {{ model }}
 {%- endfor %}
@@ -18,7 +21,11 @@ tags:
 This is a merged model created using [fusion-bench](https://github.com/tanganke/fusion_bench).
 The following models were included in the merge:
-{% for model in models %}
+{% if base_model is not none %}
+- base model: {{ base_model }}
+{%- endif %}
+{%- for model in models %}
 - {{ model }}
 {%- endfor %}

fusion_bench/models/wrappers/ensemble.py CHANGED Viewed

@@ -1,10 +1,17 @@
-from typing import Any, Callable, Dict, List, Union, cast
+import logging
+from typing import Any, Callable, Dict, Generic, List, Union, cast
 import numpy as np
 import torch
+import torch.futures
 from omegaconf import ListConfig
 from torch import Tensor, nn
+from fusion_bench.utils.devices import to_device
+from fusion_bench.utils.type import TorchModelType
+log = logging.getLogger(__name__)
 def aggregate_tensors(
     outputs: List[Any], aggregate_fn: Callable
@@ -58,12 +65,16 @@ def aggregate_tensors(
         raise ValueError("Unsupported type for outputs")
-class EnsembleModule(nn.Module):
+class EnsembleModule(nn.Module, Generic[TorchModelType]):
     """
     Ensemble module that averages the outputs of multiple models.
     """
-    def __init__(self, models: List[nn.Module]):
+    def __init__(
+        self,
+        models: List[TorchModelType],
+        device_map: Dict[int, Union[int, str]] | None = None,
+    ):
         """
         Initializes the EnsembleModule with a list of models.
@@ -73,6 +84,16 @@ class EnsembleModule(nn.Module):
         super().__init__()
         # TODO: distribute models to devices
         self.model_list = nn.ModuleList(models)
+        self.device_map = device_map
+        if self.device_map is not None:
+            self._move_models_to_devices()
+    def _move_models_to_devices(self):
+        for model_idx, device_id in self.device_map.items():
+            log.info(f"Moving model {model_idx} to device {device_id}")
+            self.model_list[model_idx] = self.model_list[model_idx].to(
+                device_id, non_blocking=True
+            )
     def _aggregate_tensors(self, outputs: List[Tensor]) -> Tensor:
         """
@@ -86,6 +107,49 @@ class EnsembleModule(nn.Module):
         """
         return torch.stack(outputs).mean(dim=0)
+    def _parallel_forward_with_device_map(self, *args: Any, **kwargs: Any) -> List[Any]:
+        """
+        Performs parallel forward pass using device mapping with futures.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        Returns:
+            List[Any]: List of outputs from all models, all moved to the same device.
+        """
+        futures = []
+        device_data_cache = {}
+        for i, model in enumerate(self.model_list):
+            device_id = self.device_map.get(i, "cpu")
+            if device_id not in device_data_cache:
+                # Move inputs to the same device as the model
+                device_args = to_device(
+                    args, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_kwargs = to_device(
+                    kwargs, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_data_cache[device_id] = (device_args, device_kwargs)
+            else:
+                device_args, device_kwargs = device_data_cache[device_id]
+            # Create a future for asynchronous execution
+            future = torch.jit.fork(model, *device_args, **device_kwargs)
+            futures.append(future)
+        # Wait for all futures to complete and collect results
+        outputs = [torch.jit.wait(future) for future in futures]
+        # Move all outputs to the same device (use the device of the first model or cpu as fallback)
+        target_device = self.device_map.get(0, "cpu") if self.device_map else "cpu"
+        outputs = [
+            to_device(output, target_device, non_blocking=True) for output in outputs
+        ]
+        return outputs
     def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by averaging the outputs of the models.
@@ -97,20 +161,25 @@ class EnsembleModule(nn.Module):
         Returns:
             Aggregated output from the ensemble of models.
         """
-        outputs = [model(*args, **kwargs) for model in self.model_list]
+        if self.device_map is None:
+            outputs = [model(*args, **kwargs) for model in self.model_list]
+        else:
+            # Parallel execution with device mapping
+            outputs = self._parallel_forward_with_device_map(*args, **kwargs)
         return aggregate_tensors(outputs, self._aggregate_tensors)
-class WeightedEnsembleModule(nn.Module):
+class WeightedEnsembleModule(nn.Module, Generic[TorchModelType]):
     """
     Ensemble module that computes a weighted average of the outputs from multiple models.
     """
     def __init__(
         self,
-        models: List[nn.Module],
+        models: List[TorchModelType],
         weights: List[float] | Tensor | np.ndarray,
         normalize: bool = True,
+        device_map: Dict[int, Union[int, str]] | None = None,
     ):
         """
         Initializes the WeightedEnsembleModule with models and their corresponding weights.
@@ -119,9 +188,12 @@ class WeightedEnsembleModule(nn.Module):
             models (List[nn.Module]): List of models to ensemble.
             weights (List[float] | Tensor | np.ndarray): Weights for each model.
             normalize (bool, optional): If True, normalizes the weights. Defaults to True.
+            device_map (Dict[int, Union[int, str]] | None, optional): Device mapping for parallel execution. Defaults to None.
         """
         super().__init__()
         self.model_list = nn.ModuleList(models)
+        self.device_map = device_map
         if isinstance(weights, (list, tuple, ListConfig)):
             weights = torch.tensor(weights)
         elif isinstance(weights, Tensor):
@@ -139,6 +211,17 @@ class WeightedEnsembleModule(nn.Module):
             weights = weights / weights.sum()
         self.register_buffer("weights", weights)
+        if self.device_map is not None:
+            self._move_models_to_devices()
+    def _move_models_to_devices(self):
+        """Move models to their assigned devices according to device_map."""
+        for model_idx, device_id in self.device_map.items():
+            log.info(f"Moving model {model_idx} to device {device_id}")
+            self.model_list[model_idx] = self.model_list[model_idx].to(
+                device_id, non_blocking=True
+            )
     def _aggregate_tensors(self, outputs: List[Tensor]) -> Tensor:
         """
         Aggregates a list of tensors using the provided weights.
@@ -152,6 +235,48 @@ class WeightedEnsembleModule(nn.Module):
         weights = cast(Tensor, self.weights).view(-1, *([1] * outputs[0].dim()))
         return (torch.stack(outputs) * weights).sum(dim=0)
+    def _parallel_forward_with_device_map(self, *args: Any, **kwargs: Any) -> List[Any]:
+        """
+        Performs parallel forward pass using device mapping with futures.
+        Args:
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+        Returns:
+            List[Any]: List of outputs from all models, all moved to the same device.
+        """
+        futures = []
+        device_data_cache = {}
+        for i, model in enumerate(self.model_list):
+            device_id = self.device_map.get(i, "cpu")
+            if device_id not in device_data_cache:
+                # Move inputs to the same device as the model
+                device_args = to_device(
+                    args, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_kwargs = to_device(
+                    kwargs, device_id, copy_on_move=True, non_blocking=True
+                )
+                device_data_cache[device_id] = (device_args, device_kwargs)
+            else:
+                device_args, device_kwargs = device_data_cache[device_id]
+            # Create a future for asynchronous execution
+            future = torch.jit.fork(model, *device_args, **device_kwargs)
+            futures.append(future)
+        # Wait for all futures to complete and collect results
+        outputs = [torch.jit.wait(future) for future in futures]
+        # Move all outputs to the same device (use the device of the first model or cpu as fallback)
+        target_device = self.device_map.get(0, "cpu") if self.device_map else "cpu"
+        outputs = [to_device(output, target_device) for output in outputs]
+        return outputs
     def forward(self, *args: Any, **kwargs: Any) -> Any:
         """
         Performs a forward pass by computing the weighted average of the models' outputs.
@@ -163,7 +288,11 @@ class WeightedEnsembleModule(nn.Module):
         Returns:
             Weighted aggregated output from the ensemble of models.
         """
-        outputs = [model(*args, **kwargs) for model in self.model_list]
+        if self.device_map is None:
+            outputs = [model(*args, **kwargs) for model in self.model_list]
+        else:
+            # Parallel execution with device mapping
+            outputs = self._parallel_forward_with_device_map(*args, **kwargs)
         return aggregate_tensors(outputs, self._aggregate_tensors)

fusion_bench/scripts/cli.py CHANGED Viewed

@@ -20,8 +20,8 @@ log = logging.getLogger(__name__)
 def _get_default_config_path():
-    for config_dir in ["fusion_bench_config", "config"]:
-        for config_path_root in [os.getcwd(), PROJECT_ROOT_PATH]:
+    for config_path_root in [os.getcwd(), PROJECT_ROOT_PATH]:
+        for config_dir in ["config", "fusion_bench_config"]:
             config_path = os.path.join(config_path_root, config_dir)
             if os.path.exists(config_path) and os.path.isdir(config_path):
                 return os.path.abspath(config_path)

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -27,7 +27,7 @@ from tqdm.autonotebook import tqdm
 from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel
 from transformers.models.clip.modeling_clip import CLIPVisionTransformer
-from fusion_bench import RuntimeConstants
+from fusion_bench import RuntimeConstants, auto_register_config
 from fusion_bench.dataset import CLIPDataset
 from fusion_bench.mixins import HydraConfigMixin, LightningFabricMixin
 from fusion_bench.models.hf_clip import HFCLIPClassifier
@@ -86,6 +86,7 @@ class LayerWiseFeatureSaver:
             torch.save(features, self.save_path)
+@auto_register_config
 class CLIPVisionModelTaskPool(
     HydraConfigMixin,
     LightningFabricMixin,
@@ -134,11 +135,13 @@ class CLIPVisionModelTaskPool(
         layer_wise_feature_first_token_only: bool = True,
         layer_wise_feature_max_num: Optional[int] = None,
         fast_dev_run: Optional[bool] = None,
+        move_to_device: bool = True,
         **kwargs,
     ):
         """
         Initialize the CLIPVisionModelTaskPool.
         """
+        super().__init__(**kwargs)
         self._test_datasets = test_datasets
         self._processor = processor
         self._data_processor = data_processor
@@ -159,7 +162,6 @@ class CLIPVisionModelTaskPool(
             self.fast_dev_run = RuntimeConstants().debug
         else:
             self.fast_dev_run = fast_dev_run
-        super().__init__(**kwargs)
     def setup(self):
         """
@@ -220,7 +222,9 @@ class CLIPVisionModelTaskPool(
             for name, dataset in self.test_datasets.items()
         }
         self.test_dataloaders = {
-            name: self.fabric.setup_dataloaders(dataloader)
+            name: self.fabric.setup_dataloaders(
+                dataloader, move_to_device=self.move_to_device
+            )
             for name, dataloader in self.test_dataloaders.items()
         }
@@ -273,6 +277,8 @@ class CLIPVisionModelTaskPool(
                 task_name=task_name,
             )
             logits: Tensor = outputs["logits"]
+            if logits.device != targets.device:
+                targets = targets.to(logits.device)
             loss = F.cross_entropy(logits, targets)
             loss_metric.update(loss.detach().cpu())
@@ -321,7 +327,8 @@ class CLIPVisionModelTaskPool(
             self.clip_model,
             processor=self.processor,
         )
-        classifier = cast(HFCLIPClassifier, self.fabric.to_device(classifier))
+        if self.move_to_device:
+            classifier = cast(HFCLIPClassifier, self.fabric.to_device(classifier))
         # collect basic model information
         training_params, all_params = count_parameters(model)
         report["model_info"] = {

fusion-bench 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

fusion-bench 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl