PyPI - fusion-bench - Versions diffs - 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl - Mend

fusion-bench 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

fusion_bench/mixins/hydra_config.py CHANGED Viewed

@@ -1,8 +1,20 @@
+"""
+Hydra Configuration Mixin for FusionBench.
+This module provides a mixin class that enables easy instantiation of objects
+from Hydra configuration files. It's designed to work seamlessly with the
+FusionBench configuration system and supports dynamic object creation based
+on YAML configuration files.
+The mixin integrates with Hydra's configuration management system to provide
+a clean interface for creating objects from structured configurations.
+"""
 import logging
 import os
 from copy import deepcopy
 from pathlib import Path
-from typing import Dict, List, Optional, Union
+from typing import Dict, List, Optional, TypeVar, Union
 import hydra.core.global_hydra
 from hydra import compose, initialize
@@ -13,10 +25,39 @@ from fusion_bench.utils.instantiate_utils import set_print_function_call
 log = logging.getLogger(__name__)
+T = TypeVar("T", bound="HydraConfigMixin")
 class HydraConfigMixin:
-    """
-    A mixin for classes that need to be instantiated from a config file.
+    R"""
+    A mixin class that provides configuration-based instantiation capabilities.
+    This mixin enables classes to be instantiated directly from Hydra configuration
+    files, supporting both direct instantiation and target-based instantiation patterns.
+    It's particularly useful in FusionBench for creating model pools, task pools,
+    and fusion algorithms from YAML configurations.
+    The mixin handles:
+    - Configuration loading and composition
+    - Target class validation
+    - Nested configuration group navigation
+    - Object instantiation with proper error handling
+    Example:
+    ```python
+    class MyAlgorithm(HydraConfigMixin):
+        def __init__(self, param1: str, param2: int = 10):
+            self.param1 = param1
+            self.param2 = param2
+    # Instantiate from config
+    algorithm = MyAlgorithm.from_config("algorithms/my_algorithm")
+    ```
+    Note:
+        This mixin requires Hydra to be properly initialized before use.
+        Typically, this is handled by the main FusionBench CLI application.
     """
     @classmethod
@@ -24,26 +65,83 @@ class HydraConfigMixin:
         cls,
         config_name: Union[str, Path],
         overrides: Optional[List[str]] = None,
-    ):
+    ) -> T:
+        """
+        Create an instance of the class from a Hydra configuration.
+        This method loads a Hydra configuration file and instantiates the class
+        using the configuration parameters. It supports both direct parameter
+        passing and target-based instantiation patterns.
+        Args:
+            config_name: The name/path of the configuration file to load.
+                        Can be a string like "algorithms/simple_average" or
+                        a Path object. The .yaml extension is optional.
+            overrides: Optional list of configuration overrides in the format
+                      ["key=value", "nested.key=value"]. These allow runtime
+                      modification of configuration parameters.
+        Returns:
+            An instance of the class configured according to the loaded configuration.
+        Raises:
+            RuntimeError: If Hydra is not properly initialized.
+            ImportError: If a target class specified in the config cannot be imported.
+            ValueError: If required configuration parameters are missing.
+        Example:
+            ```python
+            # Load with basic config
+            obj = MyClass.from_config("my_config")
+            # Load with overrides
+            obj = MyClass.from_config(
+                "my_config",
+                overrides=["param1=new_value", "param2=42"]
+            )
+            # Load nested config
+            obj = MyClass.from_config("category/subcategory/my_config")
+            ```
+        Note:
+            The method automatically handles nested configuration groups by
+            navigating through the configuration hierarchy based on the
+            config_name path structure.
+        """
+        # Verify Hydra initialization
         if not hydra.core.global_hydra.GlobalHydra.instance().is_initialized():
-            raise RuntimeError("Hydra is not initialized.")
+            raise RuntimeError(
+                "Hydra is not initialized. Please ensure Hydra is properly "
+                "initialized before calling from_config(). This is typically "
+                "handled by the FusionBench CLI application."
+            )
         else:
+            # Compose the configuration with any provided overrides
             cfg = compose(config_name=config_name, overrides=overrides)
+        # Navigate through nested configuration groups
+        # E.g., "algorithms/simple_average" -> navigate to cfg.algorithms
         config_groups = config_name.split("/")[:-1]
         for config_group in config_groups:
             cfg = cfg[config_group]
+        # Handle target-based instantiation
         if "_target_" in cfg:
-            # if the config has a _target_ key, check if it is equal to the class name
+            # Validate that the target class matches the calling class
             target_cls = import_object(cfg["_target_"])
             if target_cls != cls:
                 log.warning(
-                    f"The _target_ key in the config is {cfg['_target_']}, but the class name is {cls.__name__}."
+                    f"Configuration target mismatch: config specifies "
+                    f"'{cfg['_target_']}' but called on class '{cls.__name__}'. "
+                    f"This may indicate a configuration error."
                 )
+            # Instantiate using the target pattern with function call logging disabled
             with set_print_function_call(False):
                 obj = instantiate(cfg)
         else:
+            # Direct instantiation using configuration as keyword arguments
             obj = cls(**cfg)
         return obj

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -52,9 +52,11 @@ class LightningFabricMixin:
     and nodes, with support for custom logging via TensorBoard.
     Attributes:
     - _fabric (L.Fabric): The Lightning Fabric instance used for distributed computing.
     Note:
     This mixin is designed to be used with classes that require distributed computing capabilities and wish to
     leverage the Lightning Fabric for this purpose. It assumes the presence of a `config` attribute or parameter
     in the consuming class for configuration.

fusion_bench/mixins/serialization.py CHANGED Viewed

@@ -1,20 +1,158 @@
+import inspect
 import logging
+from copy import deepcopy
+from functools import wraps
+from inspect import Parameter, _ParameterKind
 from pathlib import Path
 from typing import Dict, Optional, Union
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, OmegaConf
+from fusion_bench.constants import FUSION_BENCH_VERSION
 from fusion_bench.utils import import_object, instantiate
+from fusion_bench.utils.instantiate_utils import set_print_function_call
 log = logging.getLogger(__name__)
+__all__ = [
+    "YAMLSerializationMixin",
+    "auto_register_config",
+    "BaseYAMLSerializable",
+]
+def auto_register_config(cls):
+    """
+    Decorator to automatically register __init__ parameters in _config_mapping.
+    This decorator enhances classes that inherit from YAMLSerializationMixin by
+    automatically mapping constructor parameters to configuration keys and
+    dynamically setting instance attributes based on provided arguments.
+    The decorator performs the following operations:
+    1. Inspects the class's __init__ method signature
+    2. Automatically populates the _config_mapping dictionary with parameter names
+    3. Wraps the __init__ method to handle both positional and keyword arguments
+    4. Sets instance attributes for all constructor parameters
+    5. Applies default values when parameters are not provided
+    Args:
+        cls (YAMLSerializationMixin): The class to be decorated. Must inherit from
+            YAMLSerializationMixin to ensure proper serialization capabilities.
+    Returns:
+        YAMLSerializationMixin: The decorated class with enhanced auto-registration
+            functionality and modified __init__ behavior.
+    Behavior:
+        - **Parameter Registration**: All non-variadic parameters (excluding *args, **kwargs)
+          from the __init__ method are automatically added to _config_mapping
+        - **Positional Arguments**: Handled in order and mapped to corresponding parameter names
+        - **Keyword Arguments**: Processed after positional arguments, overriding any conflicts
+        - **Default Values**: Applied when parameters are not provided via arguments
+        - **Attribute Setting**: All parameters become instance attributes accessible via dot notation
+    Example:
+        ```python
+        @auto_register_config
+        class MyAlgorithm(BaseYAMLSerializable):
+            def __init__(self, learning_rate: float = 0.001, batch_size: int = 32, model_name: str = "default"):
+                super().__init__()
+        # All instantiation methods work automatically:
+        algo1 = MyAlgorithm(0.01, 64)  # positional args
+        algo2 = MyAlgorithm(learning_rate=0.01, model_name="bert")  # keyword args
+        algo3 = MyAlgorithm(0.01, batch_size=128, model_name="gpt")  # mixed args
+        # Attributes are automatically set and can be serialized:
+        print(algo1.learning_rate)  # 0.01
+        print(algo1.batch_size)     # 64
+        print(algo1.model_name)     # "default" (from default value)
+        config = algo1.config
+        # DictConfig({'_target_': 'MyAlgorithm', 'learning_rate': 0.01, 'batch_size': 64, 'model_name': 'default'})
+        ```
+    Note:
+        - The decorator wraps the original __init__ method while preserving its signature for IDE support
+        - Parameters with *args or **kwargs signatures are ignored during registration
+        - The attributes are auto-registered, then the original __init__ method is called,
+        - Type hints, method name, and other metadata are preserved using functools.wraps
+        - This decorator is designed to work seamlessly with the YAML serialization system
+    Raises:
+        AttributeError: If the class does not have the required _config_mapping attribute
+            infrastructure (should inherit from YAMLSerializationMixin)
+    """
+    original_init = cls.__init__
+    sig = inspect.signature(original_init)
+    # Auto-register parameters in _config_mapping
+    if not "_config_mapping" in cls.__dict__:
+        cls._config_mapping = deepcopy(getattr(cls, "_config_mapping", {}))
+    registered_parameters = tuple(cls._config_mapping.values())
+    for param_name in list(sig.parameters.keys())[1:]:  # Skip 'self'
+        if (
+            sig.parameters[param_name].kind
+            not in [
+                _ParameterKind.VAR_POSITIONAL,
+                _ParameterKind.VAR_KEYWORD,
+            ]
+        ) and (param_name not in registered_parameters):
+            cls._config_mapping[param_name] = param_name
+    def __init__(self, *args, **kwargs):
+        nonlocal original_init, registered_parameters
+        # auto-register the attributes based on the signature
+        sig = inspect.signature(original_init)
+        param_names = list(sig.parameters.keys())[1:]  # Skip 'self'
+        # Handle positional arguments
+        for i, arg_value in enumerate(args):
+            if i < len(param_names):
+                param_name = param_names[i]
+                if sig.parameters[param_name].kind not in [
+                    _ParameterKind.VAR_POSITIONAL,
+                    _ParameterKind.VAR_KEYWORD,
+                ]:
+                    setattr(self, param_name, arg_value)
+        # Handle keyword arguments and defaults
+        for param_name in param_names:
+            if (
+                sig.parameters[param_name].kind
+                not in [
+                    _ParameterKind.VAR_POSITIONAL,
+                    _ParameterKind.VAR_KEYWORD,
+                ]
+            ) and (param_name not in registered_parameters):
+                # Skip if already set by positional argument
+                param_index = param_names.index(param_name)
+                if param_index >= 0 and param_index < len(args):
+                    continue
+                if param_name in kwargs:
+                    setattr(self, param_name, kwargs[param_name])
+                else:
+                    # Set default value if available and attribute doesn't exist
+                    default_value = sig.parameters[param_name].default
+                    if default_value is not Parameter.empty:
+                        setattr(self, param_name, default_value)
+        # Call the original __init__
+        result = original_init(self, *args, **kwargs)
+        return result
+    # Replace the original __init__ method while preserving its signature
+    cls.__init__ = __init__
+    return cls
 class YAMLSerializationMixin:
-    _recursive_: bool = False
     _config_key: Optional[str] = None
-    _config_mapping: Dict[str, str] = {
-        "_recursive_": "_recursive_",
-    }
+    _config_mapping: Dict[str, str] = {}
     R"""
     `_config_mapping` is a dictionary mapping the attribute names of the class to the config option names. This is used to convert the class to a DictConfig.
@@ -47,46 +185,50 @@ class YAMLSerializationMixin:
     By default, the `_target_` key is set to the class name as `type(self).__name__`.
     """
-    def __init__(
-        self,
-        _recursive_: bool = False,
-        **kwargs,
-    ) -> None:
-        self._recursive_ = _recursive_
+    def __init__(self, **kwargs) -> None:
         for key, value in kwargs.items():
             log.warning(f"Unused argument: {key}={value}")
     @property
-    def config(self):
+    def config(self) -> DictConfig:
         R"""
         Returns the configuration of the model pool as a DictConfig.
-        This property calls the `to_config` method to convert the model pool
-        instance into a dictionary configuration, which can be used for
-        serialization or other purposes.
+        This property converts the model pool instance into a dictionary
+        configuration, which can be used for serialization or other purposes.
         Example:
-            >>> model = SomeModelFusionAlgorithm(hyper_param_1=1, hyper_param_2=2)
-            >>> config = model.config
-            >>> print(config)
-            DictConfig({'_target_': 'SomeModelFusionAlgorithm', 'hyper_param_1': 1, 'hyper_param_2': 2})
+        ```python
+        model = SomeModelFusionAlgorithm(hyper_param_1=1, hyper_param_2=2)
+        config = model.config
+        print(config)
+        # DictConfig({'_target_': 'SomeModelFusionAlgorithm', 'hyper_param_1': 1, 'hyper_param_2': 2})
+        ```
         This is useful for serializing the object to a YAML file or for debugging.
         Returns:
             DictConfig: The configuration of the model pool.
         """
-        return self.to_config()
+        config = {"_target_": f"{type(self).__module__}.{type(self).__qualname__}"}
+        for attr, key in self._config_mapping.items():
+            if hasattr(self, attr):
+                config[key] = getattr(self, attr)
+        try:
+            return OmegaConf.create(config)
+        except Exception as e:
+            return OmegaConf.create(config, flags={"allow_objects": True})
-    def to_yaml(self, path: Union[str, Path]):
+    def to_yaml(self, path: Union[str, Path], resolve: bool = True):
         """
         Save the model pool to a YAML file.
         Args:
             path (Union[str, Path]): The path to save the model pool to.
         """
-        config = self.to_config()
-        OmegaConf.save(config, path, resolve=True)
+        OmegaConf.save(self.config, path, resolve=resolve)
     @classmethod
     def from_yaml(cls, path: Union[str, Path]):
@@ -108,41 +250,126 @@ class YAMLSerializationMixin:
                 f"The class {target_cls.__name__} is not the same as the class {cls.__name__}. "
                 f"Instantiating the class {target_cls.__name__} instead."
             )
-        return instantiate(
-            config,
-            _recursive_=(
-                cls._recursive_
-                if config.get("_recursive_") is None
-                else config.get("_recursive_")
-            ),
-        )
+        with set_print_function_call(False):
+            return instantiate(config)
-    def to_config(self):
+    def register_parameter_to_config(
+        self,
+        attr_name: str,
+        param_name: str,
+        value,
+    ):
         """
-        Convert the model pool to a DictConfig.
+        Set an attribute value and register its config mapping.
-        Returns:
-            Dict: The model pool as a DictConfig.
+        This method allows dynamic setting of object attributes while simultaneously
+        updating the configuration mapping that defines how the attribute should
+        be serialized in the configuration output.
+        Args:
+            attr_name (str): The name of the attribute to set on this object.
+            arg_name (str): The corresponding parameter name to use in the config
+                serialization. This is how the attribute will appear in YAML output.
+            value: The value to assign to the attribute.
+        Example:
+            ```python
+            model = BaseYAMLSerializable()
+            model.set_option("learning_rate", "lr", 0.001)
+            # This sets model.learning_rate = 0.001
+            # and maps it to "lr" in the config output
+            config = model.config
+            # config will contain: {"lr": 0.001, ...}
+            ```
         """
-        config = {"_target_": type(self).__name__}
-        for attr, key in self._config_mapping.items():
-            if hasattr(self, attr):
-                config[key] = getattr(self, attr)
-        return OmegaConf.create(config)
+        setattr(self, attr_name, value)
+        self._config_mapping[attr_name] = param_name
+@auto_register_config
+class BaseYAMLSerializable(YAMLSerializationMixin):
+    """
+    A base class for YAML-serializable classes with enhanced metadata support.
+    This class extends `YAMLSerializationMixin` to provide additional metadata
+    fields commonly used in FusionBench classes, including usage information
+    and version tracking. It serves as a foundation for all serializable
+    model components in the framework.
+    The class automatically handles serialization of usage and version metadata
+    alongside the standard configuration parameters, making it easier to track
+    model provenance and intended usage patterns.
+    Attributes:
+        _usage_ (Optional[str]): Description of the model's intended usage or purpose.
+        _version_ (Optional[str]): Version information for the model or configuration.
-class BaseYAMLSerializableModel(YAMLSerializationMixin):
-    _config_mapping = YAMLSerializationMixin._config_mapping | {
-        "_usage_": "_usage_",
-        "_version_": "_version_",
-    }
+    Example:
+        ```python
+        class MyAlgorithm(BaseYAMLSerializable):
+            _config_mapping = BaseYAMLSerializable._config_mapping | {
+                "model_name": "model_name",
+                "num_layers": "num_layers",
+            }
+            def __init__(self, _usage_: str = None, _version_: str = None):
+                super().__init__(_usage_=_usage_, _version_=_version_)
+        # Usage with metadata
+        model = MyAlgorithm(
+            _usage_="Text classification fine-tuning",
+            _version_="1.0.0"
+        )
+        # Serialization includes metadata
+        config = model.config
+        # DictConfig({
+        #     '_target_': 'MyModel',
+        #     '_usage_': 'Text classification fine-tuning',
+        #     '_version_': '1.0.0'
+        # })
+        ```
+    Note:
+        The underscore prefix in `_usage_` and `_version_` follows the convention
+        for metadata fields that are not core model parameters but provide
+        important contextual information for model management and tracking.
+    """
     def __init__(
         self,
+        _recursive_: bool = False,
         _usage_: Optional[str] = None,
-        _version_: Optional[str] = None,
+        _version_: Optional[str] = FUSION_BENCH_VERSION,
         **kwargs,
     ):
+        """
+        Initialize a base YAML-serializable model with metadata support.
+        Args:
+            _usage_ (Optional[str], optional): Description of the model's intended
+                usage or purpose. This can include information about the training
+                domain, expected input types, or specific use cases. Defaults to None.
+            _version_ (Optional[str], optional): Version information for the model
+                or configuration. Can be used to track model iterations, dataset
+                versions, or compatibility information. Defaults to None.
+            **kwargs: Additional keyword arguments passed to the parent class.
+                Unused arguments will trigger warnings via the parent's initialization.
+        Example:
+            ```python
+            model = BaseYAMLSerializable(
+                _usage_="Image classification on CIFAR-10",
+                _version_="2.1.0"
+            )
+            ```
+        """
         super().__init__(**kwargs)
-        self._usage_ = _usage_
-        self._version_ = _version_
+        if _version_ != FUSION_BENCH_VERSION:
+            log.warning(
+                f"Current fusion-bench version is {FUSION_BENCH_VERSION}, but the serialized version is {_version_}. "
+                "Attempting to use current version."
+            )
+            # override _version_ with current fusion-bench version
+            self._version_ = FUSION_BENCH_VERSION

fusion_bench/modelpool/__init__.py CHANGED Viewed

@@ -17,7 +17,7 @@ _import_structure = {
         "HuggingFaceGPT2ClassificationPool",
         "GPT2ForSequenceClassificationPool",
     ],
-    "seq_classification_lm": ["SeqenceClassificationModelPool"],
+    "seq_classification_lm": ["SequenceClassificationModelPool"],
 }
@@ -34,7 +34,7 @@ if TYPE_CHECKING:
     from .openclip_vision import OpenCLIPVisionModelPool
     from .PeftModelForSeq2SeqLM import PeftModelForSeq2SeqLMPool
     from .seq2seq_lm import Seq2SeqLMPool
-    from .seq_classification_lm import SeqenceClassificationModelPool
+    from .seq_classification_lm import SequenceClassificationModelPool
 else:
     sys.modules[__name__] = LazyImporter(

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import logging
 from copy import deepcopy
-from typing import Dict, List, Optional, Union
+from typing import Dict, Generator, List, Optional, Tuple, Union
 import torch
 from omegaconf import DictConfig
 from torch import nn
 from torch.utils.data import Dataset
-from fusion_bench.mixins import BaseYAMLSerializableModel, HydraConfigMixin
+from fusion_bench.mixins import BaseYAMLSerializable, HydraConfigMixin
 from fusion_bench.utils import instantiate, timeit_context
 __all__ = ["BaseModelPool"]
@@ -15,7 +15,10 @@ __all__ = ["BaseModelPool"]
 log = logging.getLogger(__name__)
-class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
+class BaseModelPool(
+    HydraConfigMixin,
+    BaseYAMLSerializable,
+):
     """
     A class for managing and interacting with a pool of models along with their associated datasets or other specifications. For example, a model pool may contain multiple models, each with its own training, validation, and testing datasets. As for the specifications, a vision model pool may contain image preprocessor, and a language model pool may contain a tokenizer.
@@ -31,7 +34,7 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
     _program = None
     _config_key = "modelpool"
     _models: Union[DictConfig, Dict[str, nn.Module]]
-    _config_mapping = BaseYAMLSerializableModel._config_mapping | {
+    _config_mapping = BaseYAMLSerializable._config_mapping | {
         "_models": "models",
         "_train_datasets": "train_datasets",
         "_val_datasets": "val_datasets",
@@ -56,7 +59,7 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
         super().__init__(**kwargs)
     @property
-    def has_pretrained(self):
+    def has_pretrained(self) -> bool:
         """
         Check if the model pool contains a pretrained model.
@@ -125,7 +128,7 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
         return len(self.model_names)
     @staticmethod
-    def is_special_model(model_name: str):
+    def is_special_model(model_name: str) -> bool:
         """
         Determine if a model is special based on its name.
@@ -152,6 +155,23 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
             model_config = deepcopy(model_config)
         return model_config
+    def get_model_path(self, model_name: str) -> str:
+        """
+        Get the path for the specified model.
+        Args:
+            model_name (str): The name of the model.
+        Returns:
+            str: The path for the specified model.
+        """
+        if isinstance(self._models[model_name], str):
+            return self._models[model_name]
+        else:
+            raise ValueError(
+                "Model path is not a string. Try to override this method in derived modelpool class."
+            )
     def load_model(
         self, model_name_or_config: Union[str, DictConfig], *args, **kwargs
     ) -> nn.Module:
@@ -159,7 +179,7 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
         Load a model from the pool based on the provided configuration.
         Args:
-            model (Union[str, DictConfig]): The model name or configuration.
+            model_name_or_config (Union[str, DictConfig]): The model name or configuration.
         Returns:
             nn.Module: The instantiated model.
@@ -201,11 +221,11 @@ class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
             model = self.load_model(self.model_names[0], *args, **kwargs)
         return model
-    def models(self):
+    def models(self) -> Generator[nn.Module, None, None]:
         for model_name in self.model_names:
             yield self.load_model(model_name)
-    def named_models(self):
+    def named_models(self) -> Generator[Tuple[str, nn.Module], None, None]:
         for model_name in self.model_names:
             yield model_name, self.load_model(model_name)

fusion-bench 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl

fusion-bench 0.2.20py3-none-any.whl → 0.2.22py3-none-any.whl