PyPI - fusion-bench - Versions diffs - 0.2.26__py3-none-any.whl → 0.2.28__py3-none-any.whl - Mend

fusion-bench 0.2.26py3-none-any.whl → 0.2.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

fusion_bench/method/classification/image_classification_finetune.py CHANGED Viewed

@@ -1,3 +1,9 @@
+"""Image Classification Fine-tuning Module.
+This module provides algorithms for fine-tuning and evaluating image classification models
+using PyTorch Lightning.
+"""
 import os
 from typing import Optional
@@ -23,35 +29,93 @@ from fusion_bench import (
 from fusion_bench.dataset import CLIPDataset
 from fusion_bench.modelpool import ResNetForImageClassificationPool
 from fusion_bench.tasks.clip_classification import get_num_classes
+from torch.utils.data import random_split
 log = get_rankzero_logger(__name__)
+def _get_base_model_name(model) -> Optional[str]:
+    if hasattr(model, "config") and hasattr(model.config, "_name_or_path"):
+        return model.config._name_or_path
+    else:
+        return None
 @auto_register_config
 class ImageClassificationFineTuning(BaseAlgorithm):
+    """Fine-tuning algorithm for image classification models.
+    This class implements end-to-end fine-tuning for image classification tasks using PyTorch Lightning.
+    It supports both epoch-based and step-based training with configurable optimizers, learning rate
+    schedulers, and data loaders.
+    Args:
+        max_epochs (Optional[int]): Maximum number of training epochs. Mutually exclusive with max_steps.
+        max_steps (Optional[int]): Maximum number of training steps. Mutually exclusive with max_epochs.
+        label_smoothing (float): Label smoothing factor for cross-entropy loss (0.0 = no smoothing).
+        optimizer (DictConfig): Configuration for the optimizer (e.g., Adam, SGD).
+        lr_scheduler (DictConfig): Configuration for the learning rate scheduler.
+        dataloader_kwargs (DictConfig): Additional keyword arguments for DataLoader construction.
+        **kwargs: Additional arguments passed to the base class.
+    Raises:
+        AssertionError: If both max_epochs and max_steps are provided.
+    Example:
+        ```python
+        >>> config = {
+        ...     'max_epochs': 10,
+        ...     'max_steps': None,
+        ...     'label_smoothing': 0.1,
+        ...     'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.001},
+        ...     'lr_scheduler': {'_target_': 'torch.optim.lr_scheduler.StepLR', 'step_size': 5},
+        ...     'dataloader_kwargs': {'batch_size': 32, 'num_workers': 4}
+        ... }
+        >>> algorithm = ImageClassificationFineTuning(**config)
+        ```
+    """
     def __init__(
         self,
         max_epochs: Optional[int],
         max_steps: Optional[int],
+        training_data_ratio: Optional[float],
         label_smoothing: float,
         optimizer: DictConfig,
         lr_scheduler: DictConfig,
         dataloader_kwargs: DictConfig,
+        save_top_k: int,
+        save_interval: int,
+        save_on_train_epoch_end: bool,
         **kwargs,
     ):
         super().__init__(**kwargs)
-        assert (max_epochs is None) or (
+        assert (max_epochs is None or max_epochs < 0) or (
             max_steps is None or max_steps < 0
         ), "Only one of max_epochs or max_steps should be set."
-        self.training_interval = "epoch" if max_epochs is not None else "step"
+        self.training_interval = (
+            "epoch" if max_epochs is not None and max_epochs > 0 else "step"
+        )
         if self.training_interval == "epoch":
             self.max_steps = -1
         log.info(f"Training interval: {self.training_interval}")
         log.info(f"Max epochs: {max_epochs}, max steps: {max_steps}")
     def run(self, modelpool: ResNetForImageClassificationPool):
+        """Execute the fine-tuning process on the provided model pool.
+        This method performs the complete fine-tuning workflow:
+        1. Loads the pretrained model from the model pool
+        2. Prepares training and validation datasets
+        3. Configures optimizer and learning rate scheduler
+        4. Sets up Lightning trainer with appropriate callbacks
+        5. Executes the training process
+        6. Saves the final fine-tuned model
+        """
         # load model and dataset
         model = modelpool.load_pretrained_or_first_model()
+        base_model_name = _get_base_model_name(model)
         assert isinstance(model, nn.Module), "Loaded model is not a nn.Module."
         assert (
@@ -59,7 +123,17 @@ class ImageClassificationFineTuning(BaseAlgorithm):
         ), "Exactly one training dataset is required."
         self.dataset_name = dataset_name = modelpool.train_dataset_names[0]
         num_classes = get_num_classes(dataset_name)
+        log.info(f"Number of classes for dataset {dataset_name}: {num_classes}")
         train_dataset = modelpool.load_train_dataset(dataset_name)
+        log.info(f"Training dataset size: {len(train_dataset)}")
+        if self.training_data_ratio is not None and 0 < self.training_data_ratio < 1:
+            train_dataset, _ = random_split(
+                train_dataset,
+                lengths=[self.training_data_ratio, 1 - self.training_data_ratio],
+            )
+            log.info(
+                f"Using {len(train_dataset)} samples for training after applying training_data_ratio={self.training_data_ratio}."
+            )
         train_dataset = CLIPDataset(
             train_dataset, processor=modelpool.load_processor(stage="train")
         )
@@ -70,6 +144,8 @@ class ImageClassificationFineTuning(BaseAlgorithm):
                 val_dataset, processor=modelpool.load_processor(stage="val")
             )
             val_loader = self.get_dataloader(val_dataset, stage="val")
+        else:
+            val_loader = None
         # configure optimizer
         optimizer = instantiate(self.optimizer, params=model.parameters())
@@ -91,7 +167,11 @@ class ImageClassificationFineTuning(BaseAlgorithm):
             objective=nn.CrossEntropyLoss(label_smoothing=self.label_smoothing),
             metrics={
                 "acc@1": Accuracy(task="multiclass", num_classes=num_classes),
-                "acc@5": Accuracy(task="multiclass", num_classes=num_classes, top_k=5),
+                f"acc@{min(5,num_classes)}": Accuracy(
+                    task="multiclass",
+                    num_classes=num_classes,
+                    top_k=min(5, num_classes),
+                ),
             },
         )
@@ -108,11 +188,21 @@ class ImageClassificationFineTuning(BaseAlgorithm):
             callbacks=[
                 pl_callbacks.LearningRateMonitor(logging_interval="step"),
                 pl_callbacks.DeviceStatsMonitor(),
+                pl_callbacks.ModelCheckpoint(
+                    save_top_k=self.save_top_k,
+                    every_n_train_steps=(
+                        self.save_interval if self.training_interval == "step" else None
+                    ),
+                    every_n_epochs=(
+                        self.save_interval
+                        if self.training_interval == "epoch"
+                        else None
+                    ),
+                    save_on_train_epoch_end=self.save_on_train_epoch_end,
+                    save_last=True,
+                ),
             ],
-            logger=TensorBoardLogger(
-                save_dir=log_dir,
-                name="",
-            ),
+            logger=TensorBoardLogger(save_dir=log_dir, name="", version=""),
             fast_dev_run=RuntimeConstants.debug,
         )
@@ -129,10 +219,27 @@ class ImageClassificationFineTuning(BaseAlgorithm):
                     "raw_checkpoints",
                     "final",
                 ),
+                algorithm_config=self.config,
+                description=f"Fine-tuned ResNet model on dataset {dataset_name}.",
+                base_model=base_model_name,
             )
         return model
     def get_dataloader(self, dataset, stage: str):
+        """Create a DataLoader for the specified dataset and training stage.
+        Constructs a PyTorch DataLoader with stage-appropriate configurations:
+        - Training stage: shuffling enabled by default
+        - Validation/test stages: shuffling disabled by default
+        Args:
+            dataset: The dataset to wrap in a DataLoader.
+            stage (str): Training stage, must be one of "train", "val", or "test".
+                Determines default shuffling behavior.
+        Returns:
+            DataLoader: Configured DataLoader for the given dataset and stage.
+        """
         assert stage in ["train", "val", "test"], f"Invalid stage: {stage}"
         dataloader_kwargs = dict(self.dataloader_kwargs)
         if "shuffle" not in dataloader_kwargs:
@@ -142,10 +249,42 @@ class ImageClassificationFineTuning(BaseAlgorithm):
 @auto_register_config
 class ImageClassificationFineTuning_Test(BaseAlgorithm):
+    """Test/evaluation algorithm for fine-tuned image classification models.
+    This class implements model evaluation on test or validation datasets using PyTorch Lightning.
+    It can either evaluate a model directly or load a model from a checkpoint before evaluation.
+    The evaluation computes standard classification metrics including top-1 and top-5 accuracy.
+    Args:
+        checkpoint_path (str): Path to the model checkpoint file. If None, uses the model
+            directly from the model pool without loading from checkpoint.
+        dataloader_kwargs (DictConfig): Additional keyword arguments for DataLoader construction.
+        **kwargs: Additional arguments passed to the base class.
+    Example:
+        ```python
+        >>> config = {
+        ...     'checkpoint_path': '/path/to/model/checkpoint.ckpt',
+        ...     'dataloader_kwargs': {'batch_size': 64, 'num_workers': 4}
+        ... }
+        >>> test_algorithm = ImageClassificationFineTuning_Test(**config)
+        ```
+    """
     def __init__(self, checkpoint_path: str, dataloader_kwargs: DictConfig, **kwargs):
         super().__init__(**kwargs)
-    def run(self, modelpool: BaseModelPool):
+    def run(self, modelpool: ResNetForImageClassificationPool):
+        """Execute model evaluation on the provided model pool's test/validation dataset.
+        This method performs the complete evaluation workflow:
+        1. Loads the model from the model pool (pretrained or first available)
+        2. Prepares the test or validation dataset (prioritizes test if both available)
+        3. Sets up the Lightning module with appropriate metrics (top-1 and top-5 accuracy)
+        4. Loads from checkpoint if specified, otherwise uses the model directly
+        5. Executes the evaluation using Lightning trainer
+        6. Logs and returns the test metrics
+        """
         assert (
             modelpool.has_val_dataset or modelpool.has_test_dataset
         ), "No validation or test dataset found in the model pool."
@@ -181,8 +320,10 @@ class ImageClassificationFineTuning_Test(BaseAlgorithm):
                 model,
                 metrics={
                     "acc@1": Accuracy(task="multiclass", num_classes=num_classes),
-                    "acc@5": Accuracy(
-                        task="multiclass", num_classes=num_classes, top_k=5
+                    f"acc@{min(5,num_classes)}": Accuracy(
+                        task="multiclass",
+                        num_classes=num_classes,
+                        top_k=min(5, num_classes),
                     ),
                 },
             )
@@ -192,8 +333,10 @@ class ImageClassificationFineTuning_Test(BaseAlgorithm):
                 model=model,
                 metrics={
                     "acc@1": Accuracy(task="multiclass", num_classes=num_classes),
-                    "acc@5": Accuracy(
-                        task="multiclass", num_classes=num_classes, top_k=5
+                    f"acc@{min(5,num_classes)}": Accuracy(
+                        task="multiclass",
+                        num_classes=num_classes,
+                        top_k=min(5, num_classes),
                     ),
                 },
             )
@@ -207,6 +350,19 @@ class ImageClassificationFineTuning_Test(BaseAlgorithm):
         return model
     def get_dataloader(self, dataset, stage: str):
+        """Create a DataLoader for the specified dataset and evaluation stage.
+        Constructs a PyTorch DataLoader with stage-appropriate configurations for evaluation.
+        Similar to the training version but typically used for test/validation datasets.
+        Args:
+            dataset: The dataset to wrap in a DataLoader.
+            stage (str): Evaluation stage, must be one of "train", "val", or "test".
+                Determines default shuffling behavior (disabled for non-train stages).
+        Returns:
+            DataLoader: Configured DataLoader for the given dataset and stage.
+        """
         assert stage in ["train", "val", "test"], f"Invalid stage: {stage}"
         dataloader_kwargs = dict(self.dataloader_kwargs)
         if "shuffle" not in dataloader_kwargs:

fusion_bench/method/dare/simple_average.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
-from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench import BaseAlgorithm, BaseModelPool, auto_register_config
 from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_mul
 from .task_arithmetic import DareTaskArithmetic
@@ -8,6 +8,7 @@ from .task_arithmetic import DareTaskArithmetic
 log = logging.getLogger(__name__)
+@auto_register_config
 class DareSimpleAverage(BaseAlgorithm):
     def __init__(
@@ -17,10 +18,10 @@ class DareSimpleAverage(BaseAlgorithm):
         rescale: bool = True,
         **kwargs,
     ):
+        super().__init__(**kwargs)
         self.sparsity_ratio = sparsity_ratio
         self.only_on_linear_weight = only_on_linear_weights
         self.rescale = rescale
-        super().__init__(**kwargs)
     def run(self, modelpool: BaseModelPool):
         return DareTaskArithmetic(

fusion_bench/method/dare/task_arithmetic.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import torch
 from torch import Tensor, nn
-from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench import BaseAlgorithm, BaseModelPool, auto_register_config
 from fusion_bench.utils.state_dict_arithmetic import state_dict_sum
 from .utils import (
@@ -12,6 +12,7 @@ from .utils import (
 )
+@auto_register_config
 class DareTaskArithmetic(BaseAlgorithm):
     """
     Implementation of Task Arithmetic w/ DARE.
@@ -27,11 +28,11 @@ class DareTaskArithmetic(BaseAlgorithm):
         rescale: bool = True,
         **kwargs,
     ):
+        super().__init__(**kwargs)
         self.scaling_factor = scaling_factor
         self.sparsity_ratio = sparsity_ratio
         self.only_on_linear_weights = only_on_linear_weights
         self.rescale = rescale
-        super().__init__(**kwargs)
     def _load_task_vector(
         self,

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -64,10 +64,12 @@ class SimpleAverageAlgorithm(
     SimpleProfilerMixin,
     BaseAlgorithm,
 ):
-    def __init__(self, show_pbar: bool = False, **kwargs):
+    def __init__(self, show_pbar: bool = False, inplace: bool = True, **kwargs):
         """
         Args:
             show_pbar (bool): If True, shows a progress bar during model loading and merging. Default is False.
+            inplace (bool): If True, overwrites the weights of the first model in the model pool.
+                If False, creates a new model for the merged weights. Default is True.
         """
         super().__init__(**kwargs)
@@ -104,12 +106,12 @@ class SimpleAverageAlgorithm(
             with self.profile("merge weights"):
                 if sd is None:
                     # Initialize the state dictionary with the first model's state dictionary
-                    sd = model.state_dict(keep_vars=True)
-                    forward_model = model
+                    sd = model.state_dict()
+                    forward_model = model if self.inplace else deepcopy(model)
                 else:
                     # Add the current model's state dictionary to the accumulated state dictionary
                     sd = state_dict_add(
-                        sd, model.state_dict(keep_vars=True), show_pbar=self.show_pbar
+                        sd, model.state_dict(), show_pbar=self.show_pbar
                     )
         with self.profile("merge weights"):
             # Divide the accumulated state dictionary by the number of models to get the average

fusion_bench/method/task_arithmetic/task_arithmetic.py CHANGED Viewed

@@ -149,7 +149,10 @@ class TaskArithmeticAlgorithm(
                     )
         with self.profile("merge weights"):
             # scale the task vector
-            task_vector = state_dict_mul(task_vector, self.config.scaling_factor)
+            # here we keep the dtype when the elements of value are all zeros to avoid dtype mismatch
+            task_vector = state_dict_mul(
+                task_vector, self.config.scaling_factor, keep_dtype_when_zero=True
+            )
             # add the task vector to the pretrained model
             state_dict = state_dict_add(pretrained_model.state_dict(), task_vector)

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -111,6 +111,15 @@ class LightningFabricMixin:
         """
         if self.fabric is not None and len(self.fabric._loggers) > 0:
             log_dir = self.fabric.logger.log_dir
+            # Special handling for SwanLabLogger to get the correct log directory
+            if (
+                log_dir is None
+                and self.fabric.logger.__class__.__name__ == "SwanLabLogger"
+            ):
+                log_dir = self.fabric.logger.save_dir or self.fabric.logger._logdir
+            assert log_dir is not None, "log_dir should not be None"
             if self.fabric.is_global_zero and not os.path.exists(log_dir):
                 os.makedirs(log_dir, exist_ok=True)
             return log_dir

fusion_bench/modelpool/__init__.py CHANGED Viewed

@@ -8,6 +8,14 @@ _import_structure = {
     "base_pool": ["BaseModelPool"],
     "causal_lm": ["CausalLMPool", "CausalLMBackbonePool"],
     "clip_vision": ["CLIPVisionModelPool"],
+    "convnext_for_image_classification": [
+        "ConvNextForImageClassificationPool",
+        "load_transformers_convnext",
+    ],
+    "dinov2_for_image_classification": [
+        "Dinov2ForImageClassificationPool",
+        "load_transformers_dinov2",
+    ],
     "nyuv2_modelpool": ["NYUv2ModelPool"],
     "huggingface_automodel": ["AutoModelPool"],
     "seq2seq_lm": ["Seq2SeqLMPool"],
@@ -18,7 +26,10 @@ _import_structure = {
         "GPT2ForSequenceClassificationPool",
     ],
     "seq_classification_lm": ["SequenceClassificationModelPool"],
-    "resnet_for_image_classification": ["ResNetForImageClassificationPool"],
+    "resnet_for_image_classification": [
+        "ResNetForImageClassificationPool",
+        "load_transformers_resnet",
+    ],
 }
@@ -26,6 +37,14 @@ if TYPE_CHECKING:
     from .base_pool import BaseModelPool
     from .causal_lm import CausalLMBackbonePool, CausalLMPool
     from .clip_vision import CLIPVisionModelPool
+    from .convnext_for_image_classification import (
+        ConvNextForImageClassificationPool,
+        load_transformers_convnext,
+    )
+    from .dinov2_for_image_classification import (
+        Dinov2ForImageClassificationPool,
+        load_transformers_dinov2,
+    )
     from .huggingface_automodel import AutoModelPool
     from .huggingface_gpt2_classification import (
         GPT2ForSequenceClassificationPool,
@@ -34,7 +53,10 @@ if TYPE_CHECKING:
     from .nyuv2_modelpool import NYUv2ModelPool
     from .openclip_vision import OpenCLIPVisionModelPool
     from .PeftModelForSeq2SeqLM import PeftModelForSeq2SeqLMPool
-    from .resnet_for_image_classification import ResNetForImageClassificationPool
+    from .resnet_for_image_classification import (
+        ResNetForImageClassificationPool,
+        load_transformers_resnet,
+    )
     from .seq2seq_lm import Seq2SeqLMPool
     from .seq_classification_lm import SequenceClassificationModelPool

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -3,7 +3,7 @@ from copy import deepcopy
 from typing import Dict, Generator, List, Optional, Tuple, Union
 import torch
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf, UnsupportedValueType
 from torch import nn
 from torch.utils.data import Dataset
@@ -52,6 +52,13 @@ class BaseModelPool(
     ):
         if isinstance(models, List):
             models = {str(model_idx): model for model_idx, model in enumerate(models)}
+        if isinstance(models, dict):
+            try:  # try to convert to DictConfig
+                models = OmegaConf.create(models)
+            except UnsupportedValueType:
+                pass
         self._models = models
         self._train_datasets = train_datasets
         self._val_datasets = val_datasets

fusion_bench/modelpool/causal_lm/causal_lm.py CHANGED Viewed

@@ -8,6 +8,7 @@ from copy import deepcopy
 from typing import Any, Dict, Optional, TypeAlias, Union, cast  # noqa: F401
 import peft
+from lightning_utilities.core.rank_zero import rank_zero_only
 from omegaconf import DictConfig, OmegaConf, flag_override
 from torch import nn
 from torch.nn.modules import Module
@@ -342,7 +343,7 @@ class CausalLMPool(BaseModelPool):
         )
         # Create and save model card if algorithm_config is provided
-        if algorithm_config is not None:
+        if algorithm_config is not None and rank_zero_only.rank == 0:
             if description is None:
                 description = "Model created using FusionBench."
             model_card_str = create_default_model_card(

fusion-bench 0.2.26__py3-none-any.whl → 0.2.28__py3-none-any.whl

fusion-bench 0.2.26py3-none-any.whl → 0.2.28py3-none-any.whl