PyPI - fusion-bench - Versions diffs - 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl - Mend

fusion-bench 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (141) hide show

fusion_bench/modelpool/resnet_for_image_classification.py CHANGED Viewed

@@ -1,3 +1,34 @@
+"""ResNet Model Pool for Image Classification.
+This module provides a flexible model pool implementation for ResNet models used in image
+classification tasks. It supports both torchvision and transformers implementations of ResNet
+architectures with configurable preprocessing, loading, and saving capabilities.
+Example Usage:
+    Create a pool with a torchvision ResNet model:
+    ```python
+    >>> # Torchvision ResNet pool
+    >>> pool = ResNetForImageClassificationPool(
+    ...     type="torchvision",
+    ...     models={"resnet18_cifar10": {"model_name": "resnet18", "dataset_name": "cifar10"}}
+    ... )
+    >>> model = pool.load_model("resnet18_cifar10")
+    >>> processor = pool.load_processor(stage="train")
+    ```
+    Create a pool with a transformers ResNet model:
+    ```python
+    >>> # Transformers ResNet pool
+    >>> pool = ResNetForImageClassificationPool(
+    ...     type="transformers",
+    ...     models={"resnet_model": {"config_path": "microsoft/resnet-50", "pretrained": True}}
+    ... )
+    ```
+"""
+import os
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -11,6 +42,7 @@ from typing import (
 )
 import torch
+from lightning_utilities.core.rank_zero import rank_zero_only
 from omegaconf import DictConfig
 from torch import nn
@@ -26,6 +58,31 @@ log = get_rankzero_logger(__name__)
 def load_torchvision_resnet(
     model_name: str, weights: Optional[str], num_classes: Optional[int]
 ) -> "TorchVisionResNet":
+    """Load a ResNet model from torchvision with optional custom classifier head.
+    This function creates a ResNet model using torchvision's model zoo and optionally
+    replaces the final classification layer to match the required number of classes.
+    Args:
+        model_name (str): Name of the ResNet model to load (e.g., 'resnet18', 'resnet50').
+            Must be a valid torchvision model name.
+        weights (Optional[str]): Pretrained weights to load. Can be 'DEFAULT', 'IMAGENET1K_V1',
+            or None for random initialization. See torchvision documentation for available options.
+        num_classes (Optional[int]): Number of output classes. If provided, replaces the final
+            fully connected layer. If None, keeps the original classifier (typically 1000 classes).
+    Returns:
+        TorchVisionResNet: The loaded ResNet model with appropriate classifier head.
+    Raises:
+        AttributeError: If model_name is not a valid torchvision model.
+    Example:
+        ```python
+        >>> model = load_torchvision_resnet("resnet18", "DEFAULT", 10)  # CIFAR-10
+        >>> model = load_torchvision_resnet("resnet50", None, 100)     # Random init, 100 classes
+        ```
+    """
     import torchvision.models
     model_fn = getattr(torchvision.models, model_name)
@@ -40,6 +97,31 @@ def load_torchvision_resnet(
 def load_transformers_resnet(
     config_path: str, pretrained: bool, dataset_name: Optional[str]
 ):
+    """Load a ResNet model from transformers with optional dataset-specific adaptation.
+    This function creates a ResNet model using the transformers library and optionally
+    adapts it for a specific dataset by updating the classifier head and label mappings.
+    Args:
+        config_path (str): Path or identifier for the model configuration. Can be a local path
+            or a Hugging Face model identifier (e.g., 'microsoft/resnet-50').
+        pretrained (bool): Whether to load pretrained weights. If True, loads from the
+            specified config_path. If False, initializes with random weights using the config.
+        dataset_name (Optional[str]): Name of the target dataset for adaptation. If provided,
+            updates the model's classifier and label mappings to match the dataset's classes.
+            If None, keeps the original model configuration.
+    Returns:
+        ResNetForImageClassification: The loaded and optionally adapted ResNet model.
+    Example:
+        ```python
+        >>> # Load pretrained model adapted for CIFAR-10
+        >>> model = load_transformers_resnet("microsoft/resnet-50", True, "cifar10")
+        >>> # Load random initialized model with default classes
+        >>> model = load_transformers_resnet("microsoft/resnet-50", False, None)
+        ```
+    """
     from transformers import AutoConfig, ResNetForImageClassification
     if pretrained:
@@ -70,13 +152,107 @@ def load_transformers_resnet(
 @auto_register_config
 class ResNetForImageClassificationPool(BaseModelPool):
-    def __init__(self, type: str, **kwargs):
-        super().__init__(**kwargs)
-        assert type in ["torchvision", "transformers"]
+    """Model pool for ResNet-based image classification models.
+    This class provides a unified interface for managing ResNet models from different sources
+    (torchvision and transformers) with automatic preprocessing, loading, and saving capabilities.
+    It supports multiple ResNet architectures and can automatically adapt models to different
+    datasets by adjusting the number of output classes.
+    The pool supports two main types:
+    - "torchvision": Uses torchvision's ResNet implementations with standard ImageNet preprocessing
+    - "transformers": Uses Hugging Face transformers' ResNetForImageClassification with auto processors
+    Args:
+        type (str): Model source type, must be either "torchvision" or "transformers".
+        **kwargs: Additional arguments passed to the base BaseModelPool class.
+    Attributes:
+        type (str): The model source type specified during initialization.
+    Raises:
+        AssertionError: If type is not "torchvision" or "transformers".
+    Example:
+        Create a pool with a torchvision ResNet model:
+        ```python
+        >>> # Torchvision-based pool
+        >>> pool = ResNetForImageClassificationPool(
+        ...     type="torchvision",
+        ...     models={
+        ...         "resnet18_cifar10": {
+        ...             "model_name": "resnet18",
+        ...             "weights": "DEFAULT",
+        ...             "dataset_name": "cifar10"
+        ...         }
+        ...     }
+        ... )
+        ```
+        ```
+        Create a pool with a transformers ResNet model:
+        ```python
+        >>> # Transformers-based pool
+        >>> pool = ResNetForImageClassificationPool(
+        ...     type="transformers",
+        ...     models={
+        ...         "resnet_model": {
+        ...             "config_path": "microsoft/resnet-50",
+        ...             "pretrained": True,
+        ...             "dataset_name": "imagenet"
+        ...         }
+        ...     }
+        ... )
+        ```
+    """
+    def __init__(self, models, type: str, **kwargs):
+        super().__init__(models=models, **kwargs)
+        assert type in [
+            "torchvision",
+            "transformers",
+        ], "type must be either 'torchvision' or 'transformers'"
     def load_processor(
         self, stage: Literal["train", "val", "test"] = "test", *args, **kwargs
     ):
+        """Load the appropriate image processor/transform for the specified training stage.
+        Creates stage-specific image preprocessing pipelines optimized for the model type:
+        For torchvision models:
+        - Train stage: Includes data augmentation (random resize crop, horizontal flip)
+        - Val/test stages: Standard preprocessing (resize, center crop) without augmentation
+        - All stages: Apply ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        For transformers models:
+        - Uses AutoImageProcessor from the pretrained model configuration
+        - Automatically handles model-specific preprocessing requirements
+        Args:
+            stage (Literal["train", "val", "test"]): The training stage determining preprocessing type.
+                - "train": Applies data augmentation for training
+                - "val"/"test": Uses standard preprocessing for evaluation
+            *args: Additional positional arguments (unused).
+            **kwargs: Additional keyword arguments (unused).
+        Returns:
+            Union[transforms.Compose, AutoImageProcessor]: The image processor/transform pipeline
+            appropriate for the specified stage and model type.
+        Raises:
+            ValueError: If no valid config_path can be found for transformers models.
+        Example:
+            ```python
+            >>> # Get training transforms for torchvision model
+            >>> train_transform = pool.load_processor(stage="train")
+            >>> # Get evaluation processor for transformers model
+            >>> eval_processor = pool.load_processor(stage="test")
+            ```
+        """
         if self.type == "torchvision":
             from torchvision import transforms
@@ -122,6 +298,58 @@ class ResNetForImageClassificationPool(BaseModelPool):
     @override
     def load_model(self, model_name_or_config: Union[str, DictConfig], *args, **kwargs):
+        """Load a ResNet model based on the provided configuration or model name.
+        This method supports flexible model loading from different sources and configurations:
+        - Direct model names (e.g., "resnet18", "resnet50") for standard architectures
+        - Model pool keys that map to configurations
+        - Dictionary/DictConfig objects with detailed model specifications
+        - Hugging Face model identifiers for transformers models
+        For torchvision models, supports:
+        - Standard ResNet architectures: resnet18, resnet34, resnet50, resnet101, resnet152
+        - Custom configurations with model_name, weights, and num_classes specifications
+        - Automatic dataset adaptation with class number inference
+        For transformers models:
+        - Loading from Hugging Face Hub or local paths
+        - Pretrained or randomly initialized models
+        - Automatic logits extraction by overriding forward method
+        - Dataset-specific label mapping configuration
+        Args:
+            model_name_or_config (Union[str, DictConfig]): Model specification that can be:
+                - A string model name (e.g., "resnet18") for standard architectures
+                - A model pool key referencing a stored configuration
+                - A dict/DictConfig with model parameters like:
+                  * For torchvision: {"model_name": "resnet18", "weights": "DEFAULT", "num_classes": 10}
+                  * For transformers: {"config_path": "microsoft/resnet-50", "pretrained": True, "dataset_name": "cifar10"}
+            *args: Additional positional arguments (unused).
+            **kwargs: Additional keyword arguments (unused).
+        Returns:
+            Union[TorchVisionResNet, ResNetForImageClassification]: The loaded ResNet model
+            configured for the specified task. For transformers models, the forward method
+            is modified to return logits directly instead of the full model output.
+        Raises:
+            ValueError: If model_name_or_config type is invalid or if model type is unknown.
+            AssertionError: If num_classes from dataset doesn't match explicit num_classes specification.
+        Example:
+            ```python
+            >>> # Load standard torchvision model
+            >>> model = pool.load_model("resnet18")
+            >>> # Load with custom configuration
+            >>> config = {"model_name": "resnet50", "weights": "DEFAULT", "dataset_name": "cifar10"}
+            >>> model = pool.load_model(config)
+            >>> # Load transformers model
+            >>> config = {"config_path": "microsoft/resnet-50", "pretrained": True}
+            >>> model = pool.load_model(config)
+            ```
+        """
         log.debug(f"Loading model: {model_name_or_config}", stacklevel=2)
         if (
             isinstance(model_name_or_config, str)
@@ -198,11 +426,64 @@ class ResNetForImageClassificationPool(BaseModelPool):
         return model
     @override
-    def save_model(self, model, path, *args, **kwargs):
+    def save_model(
+        self,
+        model,
+        path,
+        algorithm_config: Optional[DictConfig] = None,
+        description: Optional[str] = None,
+        *args,
+        **kwargs,
+    ):
+        """Save a ResNet model to the specified path using the appropriate format.
+        This method handles model saving based on the model pool type:
+        - For torchvision models: Saves only the state_dict using torch.save()
+        - For transformers models: Saves the complete model and processor using save_pretrained()
+        The saving format ensures compatibility with the corresponding loading mechanisms
+        and preserves all necessary components for model restoration.
+        Args:
+            model: The ResNet model to save. Should be compatible with the pool's model type.
+            path (str): Destination path for saving the model. For torchvision models, this
+                should be a file path (e.g., "model.pth"). For transformers models, this
+                should be a directory path where model files will be stored.
+            *args: Additional positional arguments (unused).
+            **kwargs: Additional keyword arguments (unused).
+        Raises:
+            ValueError: If the model type is unknown or unsupported.
+        Note:
+            For transformers models, both the model weights and the associated image processor
+            are saved to ensure complete reproducibility of the preprocessing pipeline.
+        Example:
+            ```python
+            >>> # Save torchvision model
+            >>> pool.save_model(model, "checkpoints/resnet18_cifar10.pth")
+            >>> # Save transformers model (saves to directory)
+            >>> pool.save_model(model, "checkpoints/resnet50_model/")
+            ```
+        """
         if self.type == "torchvision":
+            os.makedirs(os.path.dirname(path), exist_ok=True)
             torch.save(model.state_dict(), path)
         elif self.type == "transformers":
             model.save_pretrained(path)
             self.load_processor().save_pretrained(path)
+            if algorithm_config is not None and rank_zero_only.rank == 0:
+                from fusion_bench.models.hf_utils import create_default_model_card
+                model_card_str = create_default_model_card(
+                    algorithm_config=algorithm_config,
+                    description=description,
+                    modelpool_config=self.config,
+                )
+                with open(os.path.join(path, "README.md"), "w") as f:
+                    f.write(model_card_str)
         else:
             raise ValueError(f"Unknown model type: {self.type}")

fusion_bench/models/hf_clip.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import logging
-from typing import TYPE_CHECKING, Callable, Iterable, List  # noqa: F401
+from typing import TYPE_CHECKING, Callable, Iterable, List, Optional  # noqa: F401
 import torch
 from torch import Tensor, nn
@@ -39,7 +39,6 @@ class HFCLIPClassifier(nn.Module):
         self,
         clip_model: CLIPModel,
         processor: CLIPProcessor,
-        extra_module=None,
     ):
         """
         Initialize the HFCLIPClassifier.
@@ -63,8 +62,6 @@ class HFCLIPClassifier(nn.Module):
             persistent=False,
         )
-        self.extra_module = extra_module
     @property
     def text_model(self):
         """Get the text model component of CLIP."""
@@ -123,9 +120,9 @@ class HFCLIPClassifier(nn.Module):
     def forward(
         self,
         images: Tensor,
-        return_image_embeds=False,
-        return_dict=False,
-        task_name=None,
+        return_image_embeds: bool = False,
+        return_dict: bool = False,
+        task_name: Optional[str] = None,
     ):
         """
         Perform forward pass for zero-shot image classification.

fusion_bench/models/hf_utils.py CHANGED Viewed

@@ -142,7 +142,7 @@ def save_pretrained_with_remote_code(
 def create_default_model_card(
-    models: list[str],
+    models: Optional[list[str]] = None,
     base_model: Optional[str] = None,
     title: str = "Deep Model Fusion",
     tags: list[str] = ["fusion-bench", "merge"],
@@ -152,6 +152,9 @@ def create_default_model_card(
 ):
     from jinja2 import Template
+    if models is None:
+        models = []
     template: Template = Template(load_model_card_template("default.md"))
     card = template.render(
         base_model=base_model,

fusion_bench/taskpool/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ _import_structure = {
     "lm_eval_harness": ["LMEvalHarnessTaskPool"],
     "nyuv2_taskpool": ["NYUv2TaskPool"],
     "openclip_vision": ["OpenCLIPVisionModelTaskPool"],
+    "resnet_for_image_classification": ["ResNetForImageClassificationTaskPool"],
 }
@@ -34,6 +35,7 @@ if TYPE_CHECKING:
     from .lm_eval_harness import LMEvalHarnessTaskPool
     from .nyuv2_taskpool import NYUv2TaskPool
     from .openclip_vision import OpenCLIPVisionModelTaskPool
+    from .resnet_for_image_classification import ResNetForImageClassificationTaskPool
 else:
     sys.modules[__name__] = LazyImporter(

fusion_bench/taskpool/clip_vision/taskpool.py CHANGED Viewed

@@ -264,7 +264,7 @@ class CLIPVisionModelTaskPool(
         pbar = tqdm(
             test_loader,
-            desc=f"Evaluating {task_name}",
+            desc=f"Evaluating {task_name}" if task_name is not None else "Evaluating",
             leave=False,
             dynamic_ncols=True,
         )

fusion_bench/taskpool/resnet_for_image_classification.py ADDED Viewed

@@ -0,0 +1,231 @@
+import itertools
+import json
+import os
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Literal,
+    Optional,
+    TypeVar,
+    Union,
+    override,
+)
+import lightning as L
+import torch
+from lightning_utilities.core.rank_zero import rank_zero_only
+from omegaconf import DictConfig
+from torch import Tensor, nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+from torchmetrics import Accuracy, MeanMetric
+from tqdm.auto import tqdm
+from fusion_bench import (
+    BaseTaskPool,
+    LightningFabricMixin,
+    RuntimeConstants,
+    auto_register_config,
+    get_rankzero_logger,
+)
+from fusion_bench.dataset import CLIPDataset
+from fusion_bench.modelpool.resnet_for_image_classification import (
+    ResNetForImageClassificationPool,
+    load_torchvision_resnet,
+    load_transformers_resnet,
+)
+from fusion_bench.tasks.clip_classification import get_classnames, get_num_classes
+from fusion_bench.utils import count_parameters
+if TYPE_CHECKING:
+    from torchvision.models import ResNet as TorchVisionResNet
+    from transformers import ResNetForImageClassification
+log = get_rankzero_logger(__name__)
+__all__ = ["ResNetForImageClassificationTaskPool"]
+@auto_register_config
+class ResNetForImageClassificationTaskPool(
+    BaseTaskPool,
+    LightningFabricMixin,
+    ResNetForImageClassificationPool,
+):
+    _is_setup = False
+    def __init__(
+        self,
+        type: str,
+        test_datasets: DictConfig,
+        dataloader_kwargs: DictConfig,
+        processor_config_path: str,
+        **kwargs,
+    ):
+        if type == "transformers":
+            super().__init__(
+                models=DictConfig(
+                    {"_pretrained_": {"config_path": processor_config_path}}
+                ),
+                type=type,
+                test_datasets=test_datasets,
+                **kwargs,
+            )
+        elif type == "torchvision":
+            super().__init__(type=type, test_datasets=test_datasets, **kwargs)
+        else:
+            raise ValueError(f"Unknown ResNet type: {type}")
+    def setup(self):
+        processor = self.load_processor(stage="test")
+        # Load test datasets
+        test_datasets = {
+            ds_name: CLIPDataset(self.load_test_dataset(ds_name), processor=processor)
+            for ds_name in self._test_datasets
+        }
+        self.test_dataloaders = {
+            ds_name: self.fabric.setup_dataloaders(
+                self.get_dataloader(ds, stage="test")
+            )
+            for ds_name, ds in test_datasets.items()
+        }
+    def _evaluate(
+        self,
+        classifier,
+        test_loader,
+        num_classes: int,
+        task_name: str = None,
+    ):
+        classifier.eval()
+        accuracy = Accuracy(task="multiclass", num_classes=num_classes)
+        loss_metric = MeanMetric()
+        if RuntimeConstants.debug:
+            log.info("Running under fast_dev_run mode, evaluating on a single batch.")
+            test_loader = itertools.islice(test_loader, 1)
+        else:
+            test_loader = test_loader
+        pbar = tqdm(
+            test_loader,
+            desc=f"Evaluating {task_name}" if task_name is not None else "Evaluating",
+            leave=False,
+            dynamic_ncols=True,
+        )
+        for batch in pbar:
+            inputs, targets = batch
+            outputs = classifier(inputs)
+            logits: Tensor = outputs["logits"]
+            if logits.device != targets.device:
+                targets = targets.to(logits.device)
+            loss = F.cross_entropy(logits, targets)
+            loss_metric.update(loss.detach().cpu())
+            acc = accuracy(logits.detach().cpu(), targets.detach().cpu())
+            pbar.set_postfix(
+                {
+                    "accuracy": accuracy.compute().item(),
+                    "loss": loss_metric.compute().item(),
+                }
+            )
+        acc = accuracy.compute().item()
+        loss = loss_metric.compute().item()
+        results = {"accuracy": acc, "loss": loss}
+        return results
+    def evaluate(
+        self,
+        model: Union["ResNetForImageClassification", "TorchVisionResNet"],
+        name: str = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        assert isinstance(
+            model, nn.Module
+        ), f"Expected model to be an instance of nn.Module, but got {type(model)}"
+        if not self._is_setup:
+            self.setup()
+        classifier = self.fabric.to_device(model)
+        classifier.eval()
+        report = {}
+        # collect basic model information
+        training_params, all_params = count_parameters(model)
+        report["model_info"] = {
+            "trainable_params": training_params,
+            "all_params": all_params,
+            "trainable_percentage": training_params / all_params,
+        }
+        if name is not None:
+            report["model_info"]["name"] = name
+        # evaluate on each task
+        pbar = tqdm(
+            self.test_dataloaders.items(),
+            desc="Evaluating tasks",
+            total=len(self.test_dataloaders),
+        )
+        for task_name, test_dataloader in pbar:
+            num_classes = get_num_classes(task_name)
+            result = self._evaluate(
+                classifier,
+                test_dataloader,
+                num_classes=num_classes,
+                task_name=task_name,
+            )
+            report[task_name] = result
+        # calculate the average accuracy and loss
+        if "average" not in report:
+            report["average"] = {}
+            accuracies = [
+                value["accuracy"]
+                for key, value in report.items()
+                if "accuracy" in value
+            ]
+            if len(accuracies) > 0:
+                average_accuracy = sum(accuracies) / len(accuracies)
+                report["average"]["accuracy"] = average_accuracy
+            losses = [value["loss"] for key, value in report.items() if "loss" in value]
+            if len(losses) > 0:
+                average_loss = sum(losses) / len(losses)
+                report["average"]["loss"] = average_loss
+        log.info(f"Evaluation Result: {report}")
+        if self.fabric.is_global_zero and len(self.fabric._loggers) > 0:
+            save_path = os.path.join(self.log_dir, "report.json")
+            for version in itertools.count(1):
+                if not os.path.exists(save_path):
+                    break
+                # if the file already exists, increment the version to avoid overwriting
+                save_path = os.path.join(self.log_dir, f"report_{version}.json")
+            with open(save_path, "w") as fp:
+                json.dump(report, fp)
+            log.info(f"Evaluation report saved to {save_path}")
+        return report
+    def get_dataloader(self, dataset, stage: str):
+        """Create a DataLoader for the specified dataset and training stage.
+        Constructs a PyTorch DataLoader with stage-appropriate configurations:
+        - Training stage: shuffling enabled by default
+        - Validation/test stages: shuffling disabled by default
+        Args:
+            dataset: The dataset to wrap in a DataLoader.
+            stage (str): Training stage, must be one of "train", "val", or "test".
+                Determines default shuffling behavior.
+        Returns:
+            DataLoader: Configured DataLoader for the given dataset and stage.
+        """
+        assert stage in ["train", "val", "test"], f"Invalid stage: {stage}"
+        dataloader_kwargs = dict(self.dataloader_kwargs)
+        if "shuffle" not in dataloader_kwargs:
+            dataloader_kwargs["shuffle"] = stage == "train"
+        return DataLoader(dataset, **dataloader_kwargs)

fusion-bench 0.2.25__py3-none-any.whl → 0.2.27__py3-none-any.whl

fusion-bench 0.2.25py3-none-any.whl → 0.2.27py3-none-any.whl