PyPI - fusion-bench - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

fusion-bench 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (470) hide show

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: fusion_bench
-Version: 0.2.1
+Version: 0.2.2
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -47,7 +47,7 @@ Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
-# FusionBench: A Comprehensive Benchmark of Deep Model Fusion
+# FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
 [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
 [![GitHub License](https://img.shields.io/github/license/tanganke/fusion_bench)](https://github.com/tanganke/fusion_bench/blob/main/LICENSE)
@@ -57,8 +57,6 @@ Requires-Dist: pytest
 [![Static Badge](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black)
 [![Static Badge](https://img.shields.io/badge/code%20style-yamlfmt-black)](https://github.com/google/yamlfmt)
-> [!WARNING]
-> This project is still in testing phase as the API may be subject to change. Please report any issues you encounter.
 > [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
@@ -70,6 +68,12 @@ FusionBench is a benchmark suite designed to evaluate the performance of various
 Projects based on FusionBench:
+<details>
+  <summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
+  <img width="1018" alt="image" src="https://github.com/user-attachments/assets/679aaa7e-0506-4e09-a12a-345c12cf529f">
+</details>
 <details>
   <summary>Anke Tang et al. SMILE: Zero-Shot Sparse Mixture of Low-Rank Experts Construction From Pre-Trained Foundation Models. Aug, 2024. http://arxiv.org/abs/2408.10174</summary>
@@ -123,8 +127,7 @@ Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_
 ## Implement your own model fusion algorithm
 ```python
-from fusion_bench.method import BaseModelFusionAlgorithm
-from fusion_bench.modelpool import BaseModelPool
+from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
 class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
     """
@@ -132,6 +135,8 @@ class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
     """
     # _config_mapping maps the attribution to the corresponding key in the configuration file.
+    # this is optional and can be used to serialize the object to a configuration file.
+    # `self.config.hyperparam_1` will be mapped to the attribute `hyperparam_attr_1`.
     _config_mapping = BaseModelFusionAlgorithm._config_mapping | {
         "hyperparam_attr_1": "hyperparam_1",
         "hyperparam_attr_2": "hyperparam_2",

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/README.md RENAMED Viewed

@@ -1,4 +1,4 @@
-# FusionBench: A Comprehensive Benchmark of Deep Model Fusion
+# FusionBench: A Comprehensive Benchmark/ToolKit of Deep Model Fusion
 [![arXiv](https://img.shields.io/badge/arXiv-1234.56789-b31b1b.svg)](http://arxiv.org/abs/2406.03280)
 [![GitHub License](https://img.shields.io/github/license/tanganke/fusion_bench)](https://github.com/tanganke/fusion_bench/blob/main/LICENSE)
@@ -8,8 +8,6 @@
 [![Static Badge](https://img.shields.io/badge/code%20style-black-black)](https://github.com/psf/black)
 [![Static Badge](https://img.shields.io/badge/code%20style-yamlfmt-black)](https://github.com/google/yamlfmt)
-> [!WARNING]
-> This project is still in testing phase as the API may be subject to change. Please report any issues you encounter.
 > [!TIP]
 > Documentation is available at [tanganke.github.io/fusion_bench/](https://tanganke.github.io/fusion_bench/).
@@ -21,6 +19,12 @@ FusionBench is a benchmark suite designed to evaluate the performance of various
 Projects based on FusionBench:
+<details>
+  <summary>Jinluan Yang et al. Mitigating the Backdoor Effect for Multi-Task Model Merging via Safety-Aware Subspace. Oct, 2024. http://arxiv.org/abs/2410.13910</summary>
+  <img width="1018" alt="image" src="https://github.com/user-attachments/assets/679aaa7e-0506-4e09-a12a-345c12cf529f">
+</details>
 <details>
   <summary>Anke Tang et al. SMILE: Zero-Shot Sparse Mixture of Low-Rank Experts Construction From Pre-Trained Foundation Models. Aug, 2024. http://arxiv.org/abs/2408.10174</summary>
@@ -74,8 +78,7 @@ Read the [CLI documentation](https://tanganke.github.io/fusion_bench/cli/fusion_
 ## Implement your own model fusion algorithm
 ```python
-from fusion_bench.method import BaseModelFusionAlgorithm
-from fusion_bench.modelpool import BaseModelPool
+from fusion_bench import BaseModelFusionAlgorithm, BaseModelPool
 class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
     """
@@ -83,6 +86,8 @@ class DerivedModelFusionAlgorithm(BaseModelFusionAlgorithm):
     """
     # _config_mapping maps the attribution to the corresponding key in the configuration file.
+    # this is optional and can be used to serialize the object to a configuration file.
+    # `self.config.hyperparam_1` will be mapped to the attribute `hyperparam_attr_1`.
     _config_mapping = BaseModelFusionAlgorithm._config_mapping | {
         "hyperparam_attr_1": "hyperparam_1",
         "hyperparam_attr_2": "hyperparam_2",

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/fusion_bench/compat/method/__init__.py RENAMED Viewed

@@ -4,6 +4,13 @@ from .base_algorithm import ModelFusionAlgorithm
 class AlgorithmFactory:
+    """
+    Factory class to create and manage different model fusion algorithms.
+    This class provides methods to create algorithms based on a given configuration,
+    register new algorithms, and list available algorithms.
+    """
     _aglorithms = {
         # single task learning (fine-tuning)
         "clip_finetune": ".classification.clip_finetune.ImageClassificationFineTuningForCLIP",
@@ -32,6 +39,7 @@ class AlgorithmFactory:
         "clip_weight_ensembling_moe": ".we_moe.clip_we_moe.CLIPWeightEnsemblingMoEAlgorithm",
         "model_recombination": ".model_recombination.ModelRecombinationAlgorithm",
         "smile_upscaling": ".smile_upscaling.smile_upscaling.SmileUpscalingAlgorithm",
+        "sparse_clip_weight_ensembling_moe": "fusion_bench.method.SparseCLIPWeightEnsemblingMoEAlgorithm",
         "smile_mistral_upscaling": ".smile_upscaling.smile_mistral_upscaling.SmileMistralUpscalingAlgorithm",
         # pruning methods
         "magnitude_diff_pruning": ".pruning.MagnitudeDiffPruningAlgorithm",
@@ -41,6 +49,18 @@ class AlgorithmFactory:
     @staticmethod
     def create_algorithm(method_config: DictConfig) -> ModelFusionAlgorithm:
+        """
+        Create an instance of a model fusion algorithm based on the provided configuration.
+        Args:
+            method_config (DictConfig): The configuration for the algorithm. Must contain a 'name' attribute that specifies the type of the algorithm.
+        Returns:
+            ModelFusionAlgorithm: An instance of the specified algorithm.
+        Raises:
+            ValueError: If 'name' attribute is not found in the configuration or does not match any known algorithm names.
+        """
         from fusion_bench.utils import import_object
         algorithm_name = method_config.name
@@ -58,10 +78,23 @@ class AlgorithmFactory:
     @staticmethod
     def register_algorithm(name: str, algorithm_cls):
+        """
+        Register a new algorithm with the factory.
+        Args:
+            name (str): The name of the algorithm.
+            algorithm_cls: The class of the algorithm to register.
+        """
         AlgorithmFactory._aglorithms[name] = algorithm_cls
     @classmethod
     def available_algorithms(cls):
+        """
+        Get a list of available algorithms.
+        Returns:
+            list: A list of available algorithm names.
+        """
         return list(cls._aglorithms.keys())

fusion_bench-0.2.2/fusion_bench/compat/method/base_algorithm.py ADDED Viewed

@@ -0,0 +1,50 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+from omegaconf import DictConfig
+__all__ = ["ModelFusionAlgorithm"]
+class ModelFusionAlgorithm(ABC):
+    """
+    Abstract base class for model fusion algorithms (for v0.1.x versions, deprecated).
+    For implementing new method, use `fusion_bench.method.BaseModelFusionAlgorithm` instead.
+    This class provides a template for implementing model fusion algorithms.
+    Subclasses must implement the `run` method to define the fusion logic.
+    Attributes:
+        config (DictConfig): Configuration for the algorithm.
+    """
+    def __init__(self, algorithm_config: Optional[DictConfig] = None):
+        """
+        Initialize the model fusion algorithm with the given configuration.
+        Args:
+            algorithm_config (Optional[DictConfig]): Configuration for the algorithm. Defaults to an empty configuration if not provided.
+                Get access to the configuration using `self.config`.
+        """
+        super().__init__()
+        if algorithm_config is None:
+            algorithm_config = DictConfig({})
+        self.config = algorithm_config
+    @abstractmethod
+    def run(self, modelpool):
+        """
+        Fuse the models in the given model pool.
+        This method must be implemented by subclasses to define the fusion logic.
+        `modelpool` is an object responsible for managing the models to be fused and optional datasets to be used for fusion.
+        Args:
+            modelpool: The pool of models to fuse.
+        Examples:
+            >>> algorithm = SimpleAverageAlgorithm()
+            >>> modelpool = ModelPool()
+            >>> merged_model = algorithm.fuse(modelpool)
+        """
+        pass

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/fusion_bench/compat/modelpool/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+# flake8: noqa F401
 from omegaconf import DictConfig
 from fusion_bench.modelpool.huggingface_gpt2_classification import (
@@ -11,6 +12,13 @@ from .huggingface_clip_vision import HuggingFaceClipVisionPool
 class ModelPoolFactory:
+    """
+    Factory class to create and manage different model pools.
+    This class provides methods to create model pools based on a given configuration,
+    register new model pools, and list available model pools.
+    """
     _modelpool = {
         "NYUv2ModelPool": ".nyuv2_modelpool.NYUv2ModelPool",
         "huggingface_clip_vision": HuggingFaceClipVisionPool,
@@ -27,6 +35,21 @@ class ModelPoolFactory:
     @staticmethod
     def create_modelpool(modelpool_config: DictConfig) -> ModelPool:
+        """
+        Create an instance of a model pool based on the provided configuration.
+        This is for v0.1.x versions, deprecated.
+        For implementing new model pool, use `fusion_bench.modelpool.BaseModelPool` instead.
+        Args:
+            modelpool_config (DictConfig): The configuration for the model pool.
+            Must contain a 'type' attribute that specifies the type of the model pool.
+        Returns:
+            ModelPool: An instance of the specified model pool.
+        Raises:
+            ValueError: If 'type' attribute is not found in the configuration or does not match any known model pool types.
+        """
         from fusion_bench.utils import import_object
         modelpool_type = modelpool_config.get("type")
@@ -46,10 +69,23 @@ class ModelPoolFactory:
     @staticmethod
     def register_modelpool(name: str, modelpool_cls):
+        """
+        Register a new model pool with the factory.
+        Args:
+            name (str): The name of the model pool.
+            modelpool_cls: The class of the model pool to register.
+        """
         ModelPoolFactory._modelpool[name] = modelpool_cls
     @classmethod
     def available_modelpools(cls):
+        """
+        Get a list of available model pools.
+        Returns:
+            list: A list of available model pool names.
+        """
         return list(cls._modelpool.keys())

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/fusion_bench/compat/modelpool/base_pool.py RENAMED Viewed

@@ -18,11 +18,19 @@ log = logging.getLogger(__name__)
 class ModelPool(ABC):
     """
     This is the base class for all modelpools.
+    For verison v0.1.x, deprecated.
+    Please implemente new algorithms use `fusion_bench.modelpool.BaseModelPool`.
     """
     _model_names = None
     def __init__(self, modelpool_config: Optional[DictConfig] = None):
+        """
+        Initialize the ModelPool with the given configuration.
+        Args:
+            modelpool_config (Optional[DictConfig]): The configuration for the model pool.
+        """
         super().__init__()
         self.config = modelpool_config
@@ -35,6 +43,12 @@ class ModelPool(ABC):
             self._model_names = model_names
     def __len__(self):
+        """
+        Return the number of models in the model pool, exclude special models such as `_pretrained_`.
+        Returns:
+            int: The number of models in the model pool.
+        """
         return len(self.model_names)
     @property
@@ -55,6 +69,9 @@ class ModelPool(ABC):
     def has_pretrained(self):
         """
         Check if the pretrained model is available in the model pool.
+        Returns:
+            bool: True if the pretrained model is available, False otherwise.
         """
         for model_config in self.config["models"]:
             if model_config.get("name", None) == "_pretrained_":
@@ -121,22 +138,46 @@ class ModelPool(ABC):
             torch.save(model.state_dict(), path)
     def models(self):
+        """
+        Generator that yields models from the model pool.
+        Yields:
+            nn.Module: The next model in the model pool.
+        """
         for model_name in self.model_names:
             yield self.load_model(model_name)
     def named_models(self):
+        """
+        Generator that yields model names and models from the model pool.
+        Yields:
+            tuple: A tuple containing the model name and the model.
+        """
         for model_name in self.model_names:
             yield model_name, self.load_model(model_name)
     def get_train_dataset(self, model_name: str):
         """
         Get the training dataset for the model.
+        Args:
+            model_name (str): The name of the model for which to get the training dataset.
+        Returns:
+            Any: The training dataset for the model.
         """
         raise NotImplementedError
     def get_test_dataset(self, model_name: str):
         """
         Get the testing dataset for the model.
+        Args:
+            model_name (str): The name of the model for which to get the testing dataset.
+        Returns:
+            Any: The testing dataset for the model.
         """
         raise NotImplementedError
@@ -144,18 +185,27 @@ class ModelPool(ABC):
         """
         Setup the taskpool before evaluation.
         Such as setting the fabric, processor, tokenizer, etc.
+        Args:
+            taskpool (Any): The taskpool to setup.
         """
         pass
     def to_modellist(self) -> List[nn.Module]:
         """
         Convert the model pool to a list of models.
+        Returns:
+            list: A list of models.
         """
         return [self.load_model(m) for m in self.model_names]
     def to_modeldict(self) -> Dict[str, nn.Module]:
         """
         Convert the model pool to a dictionary of models.
+        Returns:
+            dict: A dictionary of models.
         """
         return {m: self.load_model(m) for m in self.model_names}
@@ -170,6 +220,13 @@ class ListModelPool(ModelPool):
         models: List[nn.Module],
         has_pretraned: bool = False,
     ):
+        """
+        Initialize the ListModelPool with the given list of models.
+        Args:
+            models (List[nn.Module]): The list of models.
+            has_pretraned (bool): Whether the first model in the list is pretrained.
+        """
         modelpool_config = {}
         modelpool_config["models"] = []
         model_dict = {}
@@ -188,6 +245,16 @@ class ListModelPool(ModelPool):
         super().__init__(DictConfig(modelpool_config))
     def load_model(self, model_config: str | DictConfig, copy=True) -> nn.Module:
+        """
+        Load the model from the model pool.
+        Args:
+            model_config (str | DictConfig): The model name or the configuration dictionary for the model to load.
+            copy (bool): Whether to return a copy of the model, defaults to `True`.
+        Returns:
+            nn.Module: The loaded model.
+        """
         if isinstance(model_config, str):
             model_config = self.get_model_config(model_config)
         model_name = model_config["name"]
@@ -203,6 +270,12 @@ class DictModelPool(ModelPool):
     """
     def __init__(self, model_dict: Dict[str, nn.Module]):
+        """
+        Initialize the DictModelPool with the given dictionary of models.
+        Args:
+            model_dict (Dict[str, nn.Module]): The dictionary of models.
+        """
         modelpool_config = {}
         modelpool_config["models"] = []
         for model_name, model in model_dict.items():
@@ -211,6 +284,16 @@ class DictModelPool(ModelPool):
         super().__init__(DictConfig(modelpool_config))
     def load_model(self, model_config: str | DictConfig, copy=True) -> nn.Module:
+        """
+        Load the model from the model pool.
+        Args:
+            model_config (str | DictConfig): The configuration dictionary for the model to load.
+            copy (bool): Whether to return a copy of the model.
+        Returns:
+            nn.Module: The loaded model.
+        """
         if isinstance(model_config, str):
             model_config = self.get_model_config(model_config)
         model_name = model_config["name"]
@@ -221,6 +304,18 @@ class DictModelPool(ModelPool):
 def to_modelpool(obj: List[nn.Module], **kwargs):
+    """
+    Convert the given object to a model pool.
+    Args:
+        obj (List[nn.Module]): The object to convert to a model pool.
+    Returns:
+        ModelPool: The converted model pool.
+    Raises:
+        ValueError: If the object cannot be converted to a model pool.
+    """
     if isinstance(obj, (ModelPool, BaseModelPool)):
         return obj
     elif isinstance(obj, (list, tuple)) and all(isinstance(m, nn.Module) for m in obj):

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/fusion_bench/compat/modelpool/huggingface_clip_vision.py RENAMED Viewed

@@ -29,6 +29,9 @@ class HuggingFaceClipVisionPool(ModelPool):
     @property
     def clip_processor(self):
+        """
+        Returns the CLIP processor. If it's not already initialized, it initializes it using the path of the pretrained model.
+        """
         if self._clip_processor is None:
             if "_pretrained_" in self._model_names:
                 self._clip_processor = CLIPProcessor.from_pretrained(
@@ -76,12 +79,33 @@ class HuggingFaceClipVisionPool(ModelPool):
             model.save_pretrained(path)
     def get_tta_dataset_config(self, dataset: str):
+        """
+        Retrieve the configuration for a TTA (Test-Time Adaptation) dataset.
+        Args:
+            dataset (str): The name of the dataset for which to retrieve the configuration.
+        Returns:
+            DictConfig: The configuration dictionary for the specified dataset.
+        Raises:
+            ValueError: If the specified dataset is not found in the configuration.
+        """
         for dataset_config in self.config.tta_datasets:
             if dataset_config.name == dataset:
                 return dataset_config
         raise ValueError(f"Dataset {dataset} not found in config")
     def prepare_dataset_config(self, dataset_config: DictConfig):
+        """
+        Prepare the dataset configuration by setting the dataset type if it's not already set.
+        Args:
+            dataset_config (DictConfig): The configuration dictionary for the dataset.
+        Returns:
+            DictConfig: The updated configuration dictionary for the dataset.
+        """
         if not hasattr(dataset_config, "type"):
             with open_dict(dataset_config):
                 dataset_config["type"] = self.config.dataset_type
@@ -94,6 +118,13 @@ class HuggingFaceClipVisionPool(ModelPool):
         """
         Load the test dataset for the task.
         This method is cached, so the dataset is loaded only once.
+        Args:
+            tta_dataset (str): The name of the TTA dataset to load.
+            clip_processor (Optional[CLIPProcessor]): The CLIP processor to use for preprocessing the dataset. If None, the default processor is used.
+        Returns:
+            CLIPDataset: The loaded and preprocessed TTA test dataset.
         """
         if clip_processor is None:
             # if clip_processor is not provided, try to load the clip_processor from pre-trained model
@@ -106,6 +137,18 @@ class HuggingFaceClipVisionPool(ModelPool):
         return dataset
     def get_train_dataset_config(self, model_name: str):
+        """
+        Retrieve the configuration for a specific training dataset.
+        Args:
+            model_name (str): The name of the model for which to retrieve the training dataset configuration.
+        Returns:
+            DictConfig: The configuration dictionary for the specified training dataset.
+        Raises:
+            ValueError: If the specified training dataset is not found in the configuration.
+        """
         for dataset_config in self.config.train_datasets:
             if dataset_config.name == model_name:
                 return dataset_config
@@ -114,6 +157,16 @@ class HuggingFaceClipVisionPool(ModelPool):
     def get_train_dataset(
         self, model_name: str, clip_processor: Optional[CLIPProcessor] = None
     ):
+        """
+        Load the training dataset for the specified model.
+        Args:
+            model_name (str): The name of the model for which to load the training dataset.
+            clip_processor (Optional[CLIPProcessor]): The CLIP processor to use for preprocessing the dataset. If None, the default processor is used.
+        Returns:
+            CLIPDataset: The loaded and preprocessed training dataset.
+        """
         if clip_processor is None:
             # if clip_processor is not provided, try to load the clip_processor from pre-trained model
             clip_processor = self.clip_processor

{fusion_bench-0.2.1 → fusion_bench-0.2.2}/fusion_bench/compat/taskpool/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
+# flake8: noqa F401
 from omegaconf import DictConfig
 from fusion_bench.taskpool.dummy import DummyTaskPool
@@ -6,6 +7,15 @@ from .base_pool import TaskPool
 class TaskPoolFactory:
+    """
+    Factory class to create and manage different task pools.
+    This is for v0.1.x versions, deprecated.
+    For implementing new task pool, use `fusion_bench.taskpool.BaseTaskPool` instead.
+    This class provides methods to create task pools based on a given configuration,
+    register new task pools, and list available task pools.
+    """
     _taskpool_types = {
         "dummy": DummyTaskPool,
         "clip_vit_classification": ".clip_image_classification.CLIPImageClassificationTaskPool",
@@ -15,6 +25,18 @@ class TaskPoolFactory:
     @staticmethod
     def create_taskpool(taskpool_config: DictConfig):
+        """
+        Create an instance of a task pool based on the provided configuration.
+        Args:
+            taskpool_config (DictConfig): The configuration for the task pool. Must contain a 'type' attribute that specifies the type of the task pool.
+        Returns:
+            TaskPool: An instance of the specified task pool.
+        Raises:
+            ValueError: If 'type' attribute is not found in the configuration or does not match any known task pool types.
+        """
         from fusion_bench.utils import import_object
         taskpool_type = taskpool_config.get("type")
@@ -34,10 +56,23 @@ class TaskPoolFactory:
     @staticmethod
     def register_taskpool(name: str, taskpool_cls):
+        """
+        Register a new task pool with the factory.
+        Args:
+            name (str): The name of the task pool.
+            taskpool_cls: The class of the task pool to register.
+        """
         TaskPoolFactory._taskpool_types[name] = taskpool_cls
     @classmethod
     def available_taskpools(cls):
+        """
+        Get a list of available task pools.
+        Returns:
+            list: A list of available task pool names.
+        """
         return list(cls._taskpool_types.keys())

fusion-bench 0.2.1__tar.gz → 0.2.2__tar.gz

fusion-bench 0.2.1tar.gz → 0.2.2tar.gz