PyPI - fusion-bench - Versions diffs - 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl - Mend

fusion-bench 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

fusion_bench/constants/__init__.py +5 -1
fusion_bench/constants/runtime.py +111 -7
fusion_bench/dataset/gsm8k.py +6 -2
fusion_bench/dataset/image_corruption/make_corruption.py +168 -0
fusion_bench/method/__init__.py +10 -2
fusion_bench/method/base_algorithm.py +29 -19
fusion_bench/method/classification/image_classification_finetune.py +1 -2
fusion_bench/method/gossip/clip_task_wise_gossip.py +1 -29
fusion_bench/metrics/model_kinship/__init__.py +2 -0
fusion_bench/metrics/model_kinship/calculate.py +77 -0
fusion_bench/metrics/model_kinship/calculate_split.py +171 -0
fusion_bench/metrics/model_kinship/utility.py +184 -0
fusion_bench/metrics/nyuv2/__init__.py +31 -0
fusion_bench/metrics/nyuv2/depth.py +30 -0
fusion_bench/metrics/nyuv2/loss.py +40 -0
fusion_bench/metrics/nyuv2/noise.py +24 -0
fusion_bench/metrics/nyuv2/normal.py +34 -1
fusion_bench/metrics/nyuv2/segmentation.py +35 -1
fusion_bench/mixins/clip_classification.py +30 -2
fusion_bench/mixins/lightning_fabric.py +46 -5
fusion_bench/mixins/rich_live.py +76 -0
fusion_bench/modelpool/base_pool.py +86 -5
fusion_bench/models/masks/mask_model.py +8 -2
fusion_bench/models/open_clip/modeling.py +7 -0
fusion_bench/models/wrappers/layer_wise_fusion.py +41 -3
fusion_bench/models/wrappers/task_wise_fusion.py +14 -3
fusion_bench/scripts/cli.py +14 -0
fusion_bench/scripts/webui.py +250 -17
fusion_bench/utils/__init__.py +14 -0
fusion_bench/utils/data.py +100 -9
fusion_bench/utils/devices.py +3 -1
fusion_bench/utils/fabric.py +185 -4
fusion_bench/utils/instantiate_utils.py +29 -18
fusion_bench/utils/json.py +6 -0
fusion_bench/utils/misc.py +16 -0
fusion_bench/utils/rich_utils.py +123 -6
fusion_bench/utils/validation.py +197 -0
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/METADATA +72 -13
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/RECORD +49 -45
fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml +6 -19
fusion_bench_config/llama_full_finetune.yaml +4 -16
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/nyuv2_config.yaml +4 -13
fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench_config/taskpool/clip-vit-base-patch32_robustness_corrupted.yaml +1 -1
fusion_bench/utils/auto.py +0 -31
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.28.dist-info → fusion_bench-0.2.30.dist-info}/top_level.txt +0 -0

fusion_bench/mixins/clip_classification.py CHANGED Viewed

@@ -59,6 +59,15 @@ class CLIPClassificationMixin(LightningFabricMixin):
     @property
     def clip_processor(self):
+        """
+        Get the CLIP processor, loading it from the model pool if necessary.
+        Returns:
+            CLIPProcessor: The CLIP processor for image and text preprocessing.
+        Raises:
+            AssertionError: If the model pool is not set.
+        """
         if self._clip_processor is None:
             assert self.modelpool is not None, "Model pool is not set"
             self._clip_processor = self.modelpool.load_processor()
@@ -125,6 +134,11 @@ class CLIPClassificationMixin(LightningFabricMixin):
             clip_model (Optional[CLIPModel]): The CLIP model to use. If not provided, a pretrained model is loaded from the model pool.
             task_names (Optional[List[str]]): A list of task names to set up the classification head for. If not provided, all models in the model pool will be used.
         """
+        # make sure the task names are equal across all processes
+        _task_names = self.fabric.broadcast(task_names, src=0)
+        if not self.fabric.is_global_zero and task_names != _task_names:
+            raise ValueError("The `task_names` must be the same across all processes.")
         self.whether_setup_zero_shot_classification_head = True
         # load clip model if not provided
         if clip_model is None:
@@ -147,7 +161,10 @@ class CLIPClassificationMixin(LightningFabricMixin):
         self.logit_scale_exp = self.fabric.to_device(self.logit_scale_exp)
         @cache_with_joblib()
-        def construct_classification_head(task: str):
+        def construct_classification_head(task: str, model_name: str):
+            log.info(
+                f"Constructing zero-shot classification head for task: {task} using model: {model_name}"
+            )
             nonlocal clip_classifier
             classnames, templates = get_classnames_and_templates(task)
@@ -163,7 +180,18 @@ class CLIPClassificationMixin(LightningFabricMixin):
         ):
             zeroshot_weights = None
             if self.fabric.is_global_zero:
-                zeroshot_weights = construct_classification_head(task)
+                if hasattr(clip_model, "config") and hasattr(
+                    clip_model.config, "_name_or_path"
+                ):
+                    model_name = clip_model.config._name_or_path
+                else:
+                    model_name = "unknown_model"
+                    log.warning(
+                        "CLIP model config does not have `_name_or_path` attribute. Using 'unknown_model' as model name."
+                    )
+                zeroshot_weights = construct_classification_head(
+                    task, model_name=model_name
+                )
             self.fabric.barrier()
             self.zeroshot_weights[task] = self.fabric.broadcast(zeroshot_weights, src=0)

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import functools
 import logging
 import os
-from typing import TYPE_CHECKING, Any, List, Optional, TypeVar
+from typing import TYPE_CHECKING, Any, List, Mapping, Optional, TypeVar
 import lightning as L
 import torch
@@ -96,12 +96,24 @@ class LightningFabricMixin:
     @property
     def fabric(self):
+        """
+        Get the Lightning Fabric instance, initializing it if necessary.
+        Returns:
+            L.Fabric: The Lightning Fabric instance for distributed computing.
+        """
         if self._fabric_instance is None:
             self.setup_lightning_fabric(getattr(self, "config", DictConfig({})))
         return self._fabric_instance
     @fabric.setter
     def fabric(self, instance: L.Fabric):
+        """
+        Set the Lightning Fabric instance.
+        Args:
+            instance: The Lightning Fabric instance to use.
+        """
         self._fabric_instance = instance
     @property
@@ -172,6 +184,15 @@ class LightningFabricMixin:
     def tensorboard_summarywriter(
         self,
     ) -> "lightning.fabric.loggers.tensorboard.SummaryWriter":
+        """
+        Get the TensorBoard SummaryWriter for detailed logging.
+        Returns:
+            SummaryWriter: The TensorBoard SummaryWriter instance.
+        Raises:
+            AttributeError: If the logger is not a TensorBoardLogger.
+        """
         if isinstance(self.fabric.logger, TensorBoardLogger):
             return self.fabric.logger.experiment
         else:
@@ -179,6 +200,12 @@ class LightningFabricMixin:
     @property
     def is_debug_mode(self):
+        """
+        Check if the program is running in debug mode (fast_dev_run).
+        Returns:
+            bool: True if fast_dev_run is enabled, False otherwise.
+        """
         if hasattr(self, "config") and self.config.get("fast_dev_run", False):
             return True
         elif hasattr(self, "_program") and self._program.config.get(
@@ -190,13 +217,22 @@ class LightningFabricMixin:
     def log(self, name: str, value: Any, step: Optional[int] = None):
         """
-        Logs the metric to the fabric's logger.
+        Logs a single metric to the fabric's logger.
+        Args:
+            name: The name of the metric to log.
+            value: The value of the metric.
+            step: Optional step number for the metric.
         """
         self.fabric.log(name, value, step=step)
-    def log_dict(self, metrics: dict, step: Optional[int] = None):
+    def log_dict(self, metrics: Mapping[str, Any], step: Optional[int] = None):
         """
-        Logs the metrics to the fabric's logger.
+        Logs multiple metrics to the fabric's logger.
+        Args:
+            metrics: Dictionary of metric names and values.
+            step: Optional step number for the metrics.
         """
         self.fabric.log_dict(metrics, step=step)
@@ -207,7 +243,12 @@ class LightningFabricMixin:
         name_template: str = "train/lr_group_{0}",
     ):
         """
-        Logs the learning rate of the optimizer to the fabric's logger.
+        Logs the learning rate of each parameter group in the optimizer.
+        Args:
+            optimizer: The optimizer whose learning rates should be logged.
+            step: Optional step number for the log entry.
+            name_template: Template string for the log name. Use {0} as placeholder for group index.
         """
         for i, param_group in enumerate(optimizer.param_groups):
             self.fabric.log(name_template.format(i), param_group["lr"], step=step)

fusion_bench/mixins/rich_live.py CHANGED Viewed

@@ -2,20 +2,96 @@ from rich.live import Live
 class RichLiveMixin:
+    """
+    A mixin class that provides Rich Live display capabilities.
+    This mixin integrates Rich's Live display functionality, allowing for
+    dynamic, auto-refreshing console output. It's particularly useful for
+    displaying real-time updates, progress information, or continuously
+    changing data without cluttering the terminal.
+    Attributes:
+        _rich_live (Live): The internal Rich Live instance for live display updates.
+    Example:
+        ```python
+        class MyTask(RichLiveMixin):
+            def run(self):
+                self.start_rich_live()
+                for i in range(100):
+                    self.rich_live_print(f"Processing item {i}")
+                    time.sleep(0.1)
+                self.stop_rich_live()
+        ```
+    """
     _rich_live: Live = None
     @property
     def rich_live(self) -> Live:
+        """
+        Get the Rich Live instance, creating it if necessary.
+        Returns:
+            Live: The Rich Live instance for dynamic console output.
+        """
         if self._rich_live is None:
             self._rich_live = Live()
         return self._rich_live
     def start_rich_live(self):
+        """
+        Start the Rich Live display context.
+        This method enters the Rich Live context, enabling dynamic console output.
+        Must be paired with stop_rich_live() to properly clean up resources.
+        Returns:
+            The Rich Live instance in its started state.
+        Example:
+            ```python
+            self.start_rich_live()
+            # Display dynamic content
+            self.rich_live_print("Dynamic output")
+            self.stop_rich_live()
+            ```
+        """
         return self.rich_live.__enter__()
     def stop_rich_live(self):
+        """
+        Stop the Rich Live display context and clean up resources.
+        This method exits the Rich Live context and resets the internal Live instance.
+        Should be called after start_rich_live() when dynamic display is complete.
+        Example:
+            ```python
+            self.start_rich_live()
+            # ... display content ...
+            self.stop_rich_live()
+            ```
+        """
         self.rich_live.__exit__(None, None, None)
         self._rich_live = None
     def rich_live_print(self, msg):
+        """
+        Print a message to the Rich Live console.
+        This method displays the given message through the Rich Live console,
+        allowing for formatted, dynamic output that updates in place.
+        Args:
+            msg: The message to display. Can be a string or any Rich renderable object.
+        Example:
+            ```python
+            self.start_rich_live()
+            self.rich_live_print("[bold green]Success![/bold green]")
+            self.rich_live_print(Panel("Status: Running"))
+            self.stop_rich_live()
+            ```
+        """
         self.rich_live.console.print(msg)

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from copy import deepcopy
-from typing import Dict, Generator, List, Optional, Tuple, Union
+from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 import torch
 from omegaconf import DictConfig, OmegaConf, UnsupportedValueType
@@ -8,7 +8,12 @@ from torch import nn
 from torch.utils.data import Dataset
 from fusion_bench.mixins import BaseYAMLSerializable, HydraConfigMixin
-from fusion_bench.utils import instantiate, timeit_context
+from fusion_bench.utils import (
+    ValidationError,
+    instantiate,
+    timeit_context,
+    validate_model_name,
+)
 __all__ = ["BaseModelPool"]
@@ -59,6 +64,16 @@ class BaseModelPool(
             except UnsupportedValueType:
                 pass
+        if not models:
+            log.warning("Initialized BaseModelPool with empty models dictionary.")
+        else:
+            # Validate model names
+            for model_name in models.keys():
+                try:
+                    validate_model_name(model_name, allow_special=True)
+                except ValidationError as e:
+                    log.warning(f"Invalid model name '{model_name}': {e}")
         self._models = models
         self._train_datasets = train_datasets
         self._val_datasets = val_datasets
@@ -147,7 +162,9 @@ class BaseModelPool(
         """
         return model_name.startswith("_") and model_name.endswith("_")
-    def get_model_config(self, model_name: str, return_copy: bool = True) -> DictConfig:
+    def get_model_config(
+        self, model_name: str, return_copy: bool = True
+    ) -> Union[DictConfig, str, Any]:
         """
         Get the configuration for the specified model.
@@ -155,10 +172,36 @@ class BaseModelPool(
             model_name (str): The name of the model.
         Returns:
-            DictConfig: The configuration for the specified model.
+            Union[DictConfig, str, Any]: The configuration for the specified model, which may be a DictConfig, string path, or other type.
+        Raises:
+            ValidationError: If model_name is invalid.
+            KeyError: If model_name is not found in the pool.
         """
+        # Validate model name
+        validate_model_name(model_name, allow_special=True)
+        # raise friendly error if model not found in the pool
+        if model_name not in self._models:
+            available_models = list(self._models.keys())
+            raise KeyError(
+                f"Model '{model_name}' not found in model pool. "
+                f"Available models: {available_models}"
+            )
         model_config = self._models[model_name]
+        if isinstance(model_config, nn.Module):
+            log.warning(
+                f"Model configuration for '{model_name}' is a pre-instantiated model. "
+                "Returning the model instance instead of configuration."
+            )
         if return_copy:
+            if isinstance(model_config, nn.Module):
+                # raise performance warning
+                log.warning(
+                    f"Furthermore, returning a copy of the pre-instantiated model '{model_name}' may be inefficient."
+                )
             model_config = deepcopy(model_config)
         return model_config
@@ -171,12 +214,28 @@ class BaseModelPool(
         Returns:
             str: The path for the specified model.
+        Raises:
+            ValidationError: If model_name is invalid.
+            KeyError: If model_name is not found in the pool.
+            ValueError: If model configuration is not a string path.
         """
+        # Validate model name
+        validate_model_name(model_name, allow_special=True)
+        if model_name not in self._models:
+            available_models = list(self._models.keys())
+            raise KeyError(
+                f"Model '{model_name}' not found in model pool. "
+                f"Available models: {available_models}"
+            )
         if isinstance(self._models[model_name], str):
             return self._models[model_name]
         else:
             raise ValueError(
-                "Model path is not a string. Try to override this method in derived modelpool class."
+                f"Model configuration for '{model_name}' is not a string path. "
+                "Try to override this method in derived modelpool class."
             )
     def load_model(
@@ -357,3 +416,25 @@ class BaseModelPool(
         """
         with timeit_context(f"Saving the state dict of model to {path}"):
             torch.save(model.state_dict(), path)
+    def __contains__(self, model_name: str) -> bool:
+        """
+        Check if a model with the given name exists in the model pool.
+        Examples:
+            >>> modelpool = BaseModelPool(models={"modelA": ..., "modelB": ...})
+            >>> "modelA" in modelpool
+            True
+            >>> "modelC" in modelpool
+            False
+        Args:
+            model_name (str): The name of the model to check.
+        Returns:
+            bool: True if the model exists, False otherwise.
+        """
+        if self._models is None:
+            raise RuntimeError("Model pool is not initialized")
+        validate_model_name(model_name, allow_special=True)
+        return model_name in self._models

fusion_bench/models/masks/mask_model.py CHANGED Viewed

@@ -113,21 +113,27 @@ class MaskModel(ParameterDictModel):
     def get_distribution(
         self,
         mask_type: Literal["discrete", "continuous"],
+        temperature: float = 0.5,
         **kwargs,
     ):
         return {
-            name: self._param_to_distribution(param, mask_type=mask_type, **kwargs)
+            name: self._param_to_distribution(
+                param, mask_type=mask_type, temperature=temperature, **kwargs
+            )
             for name, param in self.named_parameters()
         }
     def sample_mask(
         self,
         mask_type: Literal["discrete", "continuous"] = "discrete",
+        temperature: float = 0.5,
         **kwargs,
     ):
         mask = {}
         for name, param in self.named_parameters():
-            dist = self._param_to_distribution(param, mask_type, **kwargs)
+            dist = self._param_to_distribution(
+                param, mask_type, temperature=temperature, **kwargs
+            )
             if mask_type == "discrete":
                 mask[name] = dist.sample()
             elif mask_type == "continuous":

fusion_bench/models/open_clip/modeling.py CHANGED Viewed

@@ -1,3 +1,10 @@
+from fusion_bench.utils.packages import is_open_clip_available
+if not is_open_clip_available():
+    raise ImportError(
+        "open_clip is not installed. Please install it with `pip install open_clip_torch`."
+    )
 from typing import Callable, List
 import open_clip

fusion_bench/models/wrappers/layer_wise_fusion.py CHANGED Viewed

@@ -173,6 +173,24 @@ class LayerWiseMergedModel(nn.Module, Generic[TorchModelType]):
     @property
     def forward_model(self):
+        """
+        Get a functional model with merged parameters.
+        Returns a partial function that applies the pretrained model with the current
+        merged state dictionary. This allows for efficient forward passes without
+        modifying the original model's parameters.
+        Returns:
+            Callable: A partial function that can be called with (args, kwargs) to
+                perform forward pass with merged parameters.
+        Example:
+            ```python
+            # Internal usage during forward pass
+            forward_fn = merged_model.forward_model
+            output = forward_fn(args=(x,), kwargs={})
+            ```
+        """
         return functools.partial(
             functional_call,
             self.pretrained_model,
@@ -181,10 +199,30 @@ class LayerWiseMergedModel(nn.Module, Generic[TorchModelType]):
             strict=self.strict,
         )
-    def merge_and_unload(self, task_vector_mask: Optional[Dict[str, Tensor]] = None):
+    def merge_and_unload(
+        self,
+        task_vector_mask: Optional[Dict[str, Tensor]] = None,
+        copy: bool = False,
+    ) -> TorchModelType:
+        """
+        Merge models and return the final merged model.
+        Args:
+            task_vector_mask (Optional[Dict[str, Tensor]], optional): Optional masks
+                for selective parameter merging. Defaults to None.
+            copy (bool, optional): Whether to return a deep copy of the pretrained model.
+                Defaults to False. If True, the original pretrained model remains unchanged.
+        Returns:
+            TorchModelType: The pretrained model with merged parameters loaded.
+        """
         self.merge_weights(task_vector_mask=task_vector_mask)
-        self.pretrained_model.load_state_dict(self._merged_state_dict)
-        return self.pretrained_model
+        if copy:
+            model = deepcopy(self.pretrained_model)
+        else:
+            model = self.pretrained_model
+        model.load_state_dict(self._merged_state_dict)
+        return model
     def merge_weights(self, task_vector_mask: Optional[Dict[str, Tensor]] = None):
         """

fusion_bench/models/wrappers/task_wise_fusion.py CHANGED Viewed

@@ -16,6 +16,7 @@ outputs = merged_model(inputs)
 import functools
 import logging
+from copy import deepcopy
 from typing import Any, Callable, Dict, Generic, Iterator, List, Optional  # noqa: F401
 import torch
@@ -327,7 +328,11 @@ class TaskWiseMergedModel(nn.Module, Generic[TorchModelType]):
         self._merged_state_dict = state_dict
         return state_dict
-    def merge_and_unload(self, task_vector_mask: Optional[Dict[str, Tensor]] = None):
+    def merge_and_unload(
+        self,
+        task_vector_mask: Optional[Dict[str, Tensor]] = None,
+        copy: bool = False,
+    ) -> TorchModelType:
         """
         Merge models and return the final merged model.
@@ -338,6 +343,8 @@ class TaskWiseMergedModel(nn.Module, Generic[TorchModelType]):
         Args:
             task_vector_mask (Optional[Dict[str, Tensor]], optional): Optional masks
                 for selective parameter merging. Defaults to None.
+            copy (bool, optional): Whether to return a deep copy of the pretrained model.
+                Defaults to False. If True, the original pretrained model remains unchanged.
         Returns:
             TorchModelType: The pretrained model with merged parameters loaded.
@@ -363,8 +370,12 @@ class TaskWiseMergedModel(nn.Module, Generic[TorchModelType]):
             The original pretrained model parameters will be lost.
         """
         self.merge_weights(task_vector_mask=task_vector_mask)
-        self.pretrained_model.load_state_dict(self._merged_state_dict)
-        return self.pretrained_model
+        if copy:
+            model = deepcopy(self.pretrained_model)
+        else:
+            model = self.pretrained_model
+        model.load_state_dict(self._merged_state_dict)
+        return model
     def forward(self, *args, **kwargs):
         """

fusion_bench/scripts/cli.py CHANGED Viewed

@@ -69,6 +69,20 @@ def main(cfg: DictConfig) -> None:
     """
     OmegaConf.resolve(cfg)
     program: BaseHydraProgram = instantiate(cfg)
+    # Validate that instantiation succeeded and returned an object with 'run' method
+    if not hasattr(program, "run") or not callable(getattr(program, "run")):
+        err_msg = (
+            f"Expected an object with a callable 'run' method, but got {type(program).__name__}. "
+            "Ensure that the configuration specifies a concrete program class with '_target_'."
+        )
+        if "_target_" not in cfg:
+            err_msg += "\nThe '_target_' field is missing from the root configuration."
+        else:
+            err_msg += f"\nFound '_target_': {cfg._target_}"
+        err_msg += f"\n\nConfiguration content:\n{cfg}"
+        raise TypeError(err_msg)
     program.run()

fusion-bench 0.2.28__py3-none-any.whl → 0.2.30__py3-none-any.whl

fusion-bench 0.2.28py3-none-any.whl → 0.2.30py3-none-any.whl