PyPI - fusion-bench - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

fusion_bench/method/ties_merging/ties_merging.py CHANGED Viewed

@@ -9,14 +9,14 @@ Overview of Ties-Merging:
 """
 import logging
-from typing import Dict, List, Literal, Mapping, Union  # noqa: F401
+from typing import Any, Dict, List, Literal, Mapping, Union  # noqa: F401
 import torch
 from torch import Tensor, nn
 from fusion_bench.compat.modelpool import to_modelpool
 from fusion_bench.method import BaseAlgorithm
-from fusion_bench.mixins import SimpleProfilerMixin
+from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils.type import StateDictType
@@ -25,33 +25,22 @@ from .ties_merging_utils import state_dict_to_vector, ties_merging, vector_to_st
 log = logging.getLogger(__name__)
-class TiesMergingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
-    """
-    TiesMergingAlgorithm is a class for fusing multiple models using the TIES merging technique.
-    Attributes:
-        scaling_factor (float): The scaling factor to apply to the merged task vector.
-        threshold (float): The threshold for resetting values in the task vector.
-        remove_keys (List[str]): List of keys to remove from the state dictionary.
-        merge_func (Literal["sum", "mean", "max"]): The merge function to use for disjoint merging.
-    """
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "scaling_factor": "scaling_factor",
-        "threshold": "threshold",
-        "remove_keys": "remove_keys",
-        "merge_func": "merge_func",
-    }
+@auto_register_config
+class TiesMergingAlgorithm(
+    SimpleProfilerMixin,
+    BaseAlgorithm,
+):
     def __init__(
         self,
         scaling_factor: float,
         threshold: float,
         remove_keys: List[str],
         merge_func: Literal["sum", "mean", "max"],
-        **kwargs,
+        **kwargs: Any,
     ):
         """
+        TiesMergingAlgorithm is a class for fusing multiple models using the TIES merging technique.
         Initialize the TiesMergingAlgorithm with the given parameters.
         Args:
@@ -61,14 +50,12 @@ class TiesMergingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
             merge_func (Literal["sum", "mean", "max"]): The merge function to use for disjoint merging.
             **kwargs: Additional keyword arguments for the base class.
         """
-        self.scaling_factor = scaling_factor
-        self.threshold = threshold
-        self.remove_keys = remove_keys
-        self.merge_func = merge_func
         super().__init__(**kwargs)
     @torch.no_grad()
-    def run(self, modelpool: BaseModelPool | Dict[str, nn.Module], **kwargs):
+    def run(
+        self, modelpool: BaseModelPool | Dict[str, nn.Module], **kwargs: Any
+    ) -> nn.Module:
         """
         Run the TIES merging algorithm to fuse models in the model pool.

fusion_bench/method/we_moe/clip_we_moe.py CHANGED Viewed

@@ -2,6 +2,7 @@ import functools
 import logging
 import os
 from copy import deepcopy
+from typing import Any, Iterator
 import torch
 from torch import Tensor
@@ -38,7 +39,7 @@ class CLIPWeightEnsemblingMoEAlgorithm(
     modelpool: CLIPVisionModelPool = None
-    def load_checkpoint(self, model, checkpoint):
+    def load_checkpoint(self, model: Any, checkpoint: Any):
         """
         Load the checkpoint file.
@@ -49,7 +50,7 @@ class CLIPWeightEnsemblingMoEAlgorithm(
         state = {"model": model}
         self._fabric.load(checkpoint, state)
-    def save_checkpoint(self, model, checkpoint):
+    def save_checkpoint(self, model: Any, checkpoint: Any):
         """
         Save the checkpoint file.
@@ -102,7 +103,7 @@ class CLIPWeightEnsemblingMoEAlgorithm(
         return moe_model
     @functools.cache
-    def get_shuffled_test_loader_iter(self, tta_dataset: str):
+    def get_shuffled_test_loader_iter(self, tta_dataset: str) -> Iterator:
         """
         Get an iterator for the shuffled test data loader.
@@ -131,7 +132,7 @@ class CLIPWeightEnsemblingMoEAlgorithm(
         """
         self.setup_zero_shot_classification_head()
-    def compute_logits(self, module, batch, task) -> Tensor:
+    def compute_logits(self, module: Any, batch: Any, task: Any) -> Tensor:
         """
         Compute the logits for the given batch and task.

fusion_bench/method/we_moe/we_moe.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from abc import abstractmethod
-from typing import cast  # noqa: F401
+from typing import Any, cast  # noqa: F401
 import lightning as L
 import lightning.fabric.wrappers
@@ -70,7 +70,7 @@ class WeightEnsemblingMoEAlgorithm(
             assert "No CUDA device available."
     @abstractmethod
-    def load_checkpoint(self, model, checkpoint):
+    def load_checkpoint(self, model: Any, checkpoint: Any):
         """
         Load the checkpoint file.
@@ -81,7 +81,7 @@ class WeightEnsemblingMoEAlgorithm(
         pass
     @abstractmethod
-    def save_checkpoint(self, model, checkpoint):
+    def save_checkpoint(self, model: Any, checkpoint: Any):
         """
         Save the checkpoint file.
@@ -121,7 +121,7 @@ class WeightEnsemblingMoEAlgorithm(
         pass
     @abstractmethod
-    def compute_logits(self, module, batch, task) -> Tensor:
+    def compute_logits(self, module: Any, batch: Any, task: Any) -> Tensor:
         """
         Compute the logits for a given batch and task.
@@ -135,7 +135,7 @@ class WeightEnsemblingMoEAlgorithm(
         """
         pass
-    def test_time_adaptation(self, module: WeightEnsemblingMoE):
+    def test_time_adaptation(self, module: WeightEnsemblingMoE) -> WeightEnsemblingMoE:
         """
         Perform test-time adaptation for the given module.
@@ -208,7 +208,7 @@ class WeightEnsemblingMoEAlgorithm(
         return module
-    def run(self, modelpool: ModelPool):
+    def run(self, modelpool: ModelPool) -> WeightEnsemblingMoE:
         """
         Run the WeightEnsemblingMoEAlgorithm to fuse models using Weight Ensembling Mixture of Experts.

fusion_bench/method/weighted_average/llama.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import List, Mapping, Union  # noqa: F401
 import numpy as np
 import torch
+from transformers import PreTrainedModel
 from typing_extensions import override
 from fusion_bench.method import BaseAlgorithm
@@ -10,24 +11,17 @@ from fusion_bench.modelpool import CausalLMPool
 from fusion_bench.utils import timeit_context
 from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_mul
 from fusion_bench.utils.type import StateDictType
+from fusion_bench.mixins import auto_register_config
 log = logging.getLogger(__name__)
+@auto_register_config
 class WeightedAverageForLLama(BaseAlgorithm):
     """
     A class to perform weighted averaging of LlaMa/Mistral models.
     """
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "normalize": "normalize",
-        "weights": "weights",
-        "backbone_only": "backbone_only",
-        "merged_model_save_path": "merged_model_save_path",
-        "save_tokenizer": "save_tokenizer",
-        "push_to_hub": "push_to_hub",
-    }
     def __init__(
         self,
         normalize: bool,
@@ -49,17 +43,11 @@ class WeightedAverageForLLama(BaseAlgorithm):
             save_tokenizer (bool): Whether to save the tokenizer.
             push_to_hub (bool): Whether to push the model to the hub.
         """
-        self.normalize = normalize
-        self.weights = weights
-        self.backbone_only = backbone_only
-        self.merged_model_save_path = merged_model_save_path
-        self.save_tokenizer = save_tokenizer
-        self.push_to_hub = push_to_hub
         super().__init__(**kwargs)
     @override
     @torch.no_grad()
-    def run(self, modelpool: CausalLMPool):
+    def run(self, modelpool: CausalLMPool) -> PreTrainedModel:
         """
         Executes the weighted averaging of models in the provided model pool.

fusion_bench/metrics/continual_learning/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .backward_transfer import compute_backward_transfer

fusion_bench/metrics/continual_learning/backward_transfer.py CHANGED Viewed

@@ -10,7 +10,7 @@ def compute_backward_transfer(
     Compute the backward transfer (BWT) of a model on a set of tasks.
     Equation:
-        BWT = \frac{1}{n} \sum_{k=1}^{n} (acc_{Ti}[k] - acc_{ii}[k])
+        $BWT = \frac{1}{n} \sum_{k=1}^{n} (acc_{T,i}[k] - acc_{i,i}[k])$
     Returns:
         float: The backward transfer of the model.

fusion_bench/metrics/nyuv2/__init__.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from .depth import DepthMetric
 from .noise import NoiseMetric
 from .normal import NormalMetric
-from .segmentation import SegmentationMertic
+from .segmentation import SegmentationMetric
 metric_classes = {
-    "segmentation": SegmentationMertic,
+    "segmentation": SegmentationMetric,
     "depth": DepthMetric,
     "normal": NormalMetric,
     "noise": NoiseMetric,

fusion_bench/metrics/nyuv2/segmentation.py CHANGED Viewed

@@ -5,7 +5,7 @@ from torch import Tensor, nn
 from torchmetrics import Metric
-class SegmentationMertic(Metric):
+class SegmentationMetric(Metric):
     metric_names = ["mIoU", "pixAcc"]
     def __init__(self, num_classes=13):

fusion_bench/mixins/__init__.py CHANGED Viewed

@@ -11,7 +11,11 @@ _import_structure = {
     "hydra_config": ["HydraConfigMixin"],
     "lightning_fabric": ["LightningFabricMixin"],
     "openclip_classification": ["OpenCLIPClassificationMixin"],
-    "serialization": ["YAMLSerializationMixin", "BaseYAMLSerializableModel"],
+    "serialization": [
+        "BaseYAMLSerializable",
+        "YAMLSerializationMixin",
+        "auto_register_config",
+    ],
     "simple_profiler": ["SimpleProfilerMixin"],
 }
@@ -21,7 +25,11 @@ if TYPE_CHECKING:
     from .hydra_config import HydraConfigMixin
     from .lightning_fabric import LightningFabricMixin
     from .openclip_classification import OpenCLIPClassificationMixin
-    from .serialization import BaseYAMLSerializableModel, YAMLSerializationMixin
+    from .serialization import (
+        BaseYAMLSerializable,
+        YAMLSerializationMixin,
+        auto_register_config,
+    )
     from .simple_profiler import SimpleProfilerMixin
 else:
     sys.modules[__name__] = LazyImporter(

fusion_bench/mixins/clip_classification.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import (  # noqa: F401
     TYPE_CHECKING,
     Any,
     Dict,
+    Iterator,
     List,
     Optional,
     Tuple,
@@ -48,7 +49,7 @@ class CLIPClassificationMixin(LightningFabricMixin):
     - `zeroshot_weights_cache_dir` (Optional[str]): The directory to cache the zero-shot weights.
     """
-    _dataloader_kwargs: Dict[str, Any] = {}
+    dataloader_kwargs: Dict[str, Any] = {}
     # the modelpool is set by inheriting class
     modelpool: CLIPVisionModelPool = None
     _clip_processor: CLIPProcessor = None
@@ -71,7 +72,7 @@ class CLIPClassificationMixin(LightningFabricMixin):
         batch_size: Optional[int] = None,
         num_workers: Optional[int] = None,
         **loader_kwargs,
-    ):
+    ) -> Iterator:
         """
         Get an iterator for a shuffled test DataLoader.
@@ -89,7 +90,7 @@ class CLIPClassificationMixin(LightningFabricMixin):
             Iterator: An iterator over the shuffled test DataLoader.
         """
         # get dataloader kwargs
-        dataloader_kwargs = self._dataloader_kwargs.copy()
+        dataloader_kwargs = self.dataloader_kwargs.copy()
         dataloader_kwargs["shuffle"] = True
         if batch_size is not None:
             dataloader_kwargs["batch_size"] = batch_size

fusion_bench/mixins/hydra_config.py CHANGED Viewed

@@ -1,8 +1,20 @@
+"""
+Hydra Configuration Mixin for FusionBench.
+This module provides a mixin class that enables easy instantiation of objects
+from Hydra configuration files. It's designed to work seamlessly with the
+FusionBench configuration system and supports dynamic object creation based
+on YAML configuration files.
+The mixin integrates with Hydra's configuration management system to provide
+a clean interface for creating objects from structured configurations.
+"""
 import logging
 import os
 from copy import deepcopy
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, TypeVar, Union
 import hydra.core.global_hydra
 from hydra import compose, initialize
@@ -13,10 +25,39 @@ from fusion_bench.utils.instantiate_utils import set_print_function_call
 log = logging.getLogger(__name__)
+T = TypeVar("T", bound="HydraConfigMixin")
 class HydraConfigMixin:
-    """
-    A mixin for classes that need to be instantiated from a config file.
+    R"""
+    A mixin class that provides configuration-based instantiation capabilities.
+    This mixin enables classes to be instantiated directly from Hydra configuration
+    files, supporting both direct instantiation and target-based instantiation patterns.
+    It's particularly useful in FusionBench for creating model pools, task pools,
+    and fusion algorithms from YAML configurations.
+    The mixin handles:
+    - Configuration loading and composition
+    - Target class validation
+    - Nested configuration group navigation
+    - Object instantiation with proper error handling
+    Example:
+    ```python
+    class MyAlgorithm(HydraConfigMixin):
+        def __init__(self, param1: str, param2: int = 10):
+            self.param1 = param1
+            self.param2 = param2
+    # Instantiate from config
+    algorithm = MyAlgorithm.from_config("algorithms/my_algorithm")
+    ```
+    Note:
+        This mixin requires Hydra to be properly initialized before use.
+        Typically, this is handled by the main FusionBench CLI application.
     """
     @classmethod
@@ -24,26 +65,83 @@ class HydraConfigMixin:
         cls,
         config_name: Union[str, Path],
         overrides: Optional[List[str]] = None,
-    ):
+    ) -> T:
+        """
+        Create an instance of the class from a Hydra configuration.
+        This method loads a Hydra configuration file and instantiates the class
+        using the configuration parameters. It supports both direct parameter
+        passing and target-based instantiation patterns.
+        Args:
+            config_name: The name/path of the configuration file to load.
+                        Can be a string like "algorithms/simple_average" or
+                        a Path object. The .yaml extension is optional.
+            overrides: Optional list of configuration overrides in the format
+                      ["key=value", "nested.key=value"]. These allow runtime
+                      modification of configuration parameters.
+        Returns:
+            An instance of the class configured according to the loaded configuration.
+        Raises:
+            RuntimeError: If Hydra is not properly initialized.
+            ImportError: If a target class specified in the config cannot be imported.
+            ValueError: If required configuration parameters are missing.
+        Example:
+            ```python
+            # Load with basic config
+            obj = MyClass.from_config("my_config")
+            # Load with overrides
+            obj = MyClass.from_config(
+                "my_config",
+                overrides=["param1=new_value", "param2=42"]
+            )
+            # Load nested config
+            obj = MyClass.from_config("category/subcategory/my_config")
+            ```
+        Note:
+            The method automatically handles nested configuration groups by
+            navigating through the configuration hierarchy based on the
+            config_name path structure.
+        """
+        # Verify Hydra initialization
         if not hydra.core.global_hydra.GlobalHydra.instance().is_initialized():
-            raise RuntimeError("Hydra is not initialized.")
+            raise RuntimeError(
+                "Hydra is not initialized. Please ensure Hydra is properly "
+                "initialized before calling from_config(). This is typically "
+                "handled by the FusionBench CLI application."
+            )
         else:
+            # Compose the configuration with any provided overrides
             cfg = compose(config_name=config_name, overrides=overrides)
+        # Navigate through nested configuration groups
+        # E.g., "algorithms/simple_average" -> navigate to cfg.algorithms
         config_groups = config_name.split("/")[:-1]
         for config_group in config_groups:
             cfg = cfg[config_group]
+        # Handle target-based instantiation
         if "_target_" in cfg:
-            # if the config has a _target_ key, check if it is equal to the class name
+            # Validate that the target class matches the calling class
             target_cls = import_object(cfg["_target_"])
             if target_cls != cls:
                 log.warning(
-                    f"The _target_ key in the config is {cfg['_target_']}, but the class name is {cls.__name__}."
+                    f"Configuration target mismatch: config specifies "
+                    f"'{cfg['_target_']}' but called on class '{cls.__name__}'. "
+                    f"This may indicate a configuration error."
                 )
+            # Instantiate using the target pattern with function call logging disabled
             with set_print_function_call(False):
                 obj = instantiate(cfg)
         else:
+            # Direct instantiation using configuration as keyword arguments
             obj = cls(**cfg)
         return obj

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -52,9 +52,11 @@ class LightningFabricMixin:
     and nodes, with support for custom logging via TensorBoard.
     Attributes:
     - _fabric (L.Fabric): The Lightning Fabric instance used for distributed computing.
     Note:
     This mixin is designed to be used with classes that require distributed computing capabilities and wish to
     leverage the Lightning Fabric for this purpose. It assumes the presence of a `config` attribute or parameter
     in the consuming class for configuration.

fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl