PyPI - fusion-bench - Versions diffs - 0.2.30__py3-none-any.whl → 0.2.32__py3-none-any.whl - Mend

fusion-bench 0.2.30py3-none-any.whl → 0.2.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

fusion_bench/__init__.py +6 -0
fusion_bench/__main__.py +2 -2
fusion_bench/constants/runtime.py +4 -1
fusion_bench/dataset/__init__.py +2 -0
fusion_bench/dataset/clip_dataset.py +4 -72
fusion_bench/dataset/image_dataset.py +44 -18
fusion_bench/method/base_algorithm.py +4 -0
fusion_bench/method/classification/image_classification_finetune.py +1 -0
fusion_bench/method/concrete_subspace/clip_concrete_tsvm.py +285 -0
fusion_bench/method/dop/dop.py +0 -22
fusion_bench/method/dop/dop_general.py +489 -0
fusion_bench/method/dop/utils.py +24 -4
fusion_bench/method/emr_merging/__init__.py +1 -0
fusion_bench/method/emr_merging/emr_merging.py +53 -0
fusion_bench/method/emr_merging/utils.py +162 -0
fusion_bench/method/opcm/opcm.py +6 -2
fusion_bench/method/opcm/opcm_general.py +356 -0
fusion_bench/method/opcm/utils.py +1 -4
fusion_bench/method/simple_average.py +52 -18
fusion_bench/method/task_arithmetic/task_arithmetic.py +1 -1
fusion_bench/method/task_singular_vector/TSVM.py +7 -6
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +0 -1
fusion_bench/mixins/lightning_fabric.py +110 -11
fusion_bench/mixins/openclip_classification.py +155 -1
fusion_bench/mixins/serialization.py +1 -1
fusion_bench/modelpool/base_pool.py +37 -0
fusion_bench/modelpool/convnext_for_image_classification.py +5 -2
fusion_bench/modelpool/openclip_vision/modelpool.py +12 -3
fusion_bench/models/hf_clip.py +20 -0
fusion_bench/models/modulator/__init__.py +1 -0
fusion_bench/models/modulator/base.py +123 -0
fusion_bench/models/open_clip/modeling.py +61 -5
fusion_bench/models/open_clip/utils.py +13 -2
fusion_bench/models/parameter_dict.py +119 -29
fusion_bench/models/utils.py +190 -2
fusion_bench/models/wrappers/switch.py +90 -0
fusion_bench/programs/base_program.py +6 -0
fusion_bench/programs/fabric_fusion_program.py +4 -0
fusion_bench/py.typed +1 -0
fusion_bench/scripts/cli.py +25 -23
fusion_bench/scripts/imgui.py +2 -2
fusion_bench/scripts/webui.py +2 -2
fusion_bench/taskpool/image_classification.py +270 -0
fusion_bench/utils/__init__.py +20 -1
fusion_bench/utils/data.py +1 -1
fusion_bench/utils/dict.py +19 -0
fusion_bench/utils/dtype.py +19 -0
fusion_bench/utils/hydra_utils.py +75 -0
fusion_bench/utils/misc.py +1 -0
fusion_bench/utils/packages.py +4 -0
fusion_bench/utils/parameters.py +33 -0
fusion_bench/utils/rich_utils.py +42 -19
fusion_bench/utils/state_dict_arithmetic.py +183 -1
fusion_bench/utils/tensorboard.py +21 -3
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/METADATA +3 -1
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/RECORD +70 -53
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/WHEEL +1 -1
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/entry_points.txt +1 -1
fusion_bench_config/README.md +9 -0
fusion_bench_config/fabric/auto.yaml +1 -0
fusion_bench_config/fabric/loggers/mlflow_logger.yaml +4 -0
fusion_bench_config/hydra/default.yaml +3 -1
fusion_bench_config/method/concrete_subspace/clip_concrete_tsvm.yaml +38 -0
fusion_bench_config/method/dop/dop_general.yaml +33 -0
fusion_bench_config/method/emr_merging/emr_merging.yaml +1 -0
fusion_bench_config/method/opcm/opcm_general.yaml +18 -0
fusion_bench_config/modelpool/ConvNextForImageClassification/convnext-base-224_8-tasks.yaml +15 -0
fusion_bench_config/taskpool/ImageClassificationTaskPool/convnext-base-224_8-tasks.yaml +17 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.32.dist-info}/top_level.txt +0 -0

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -3,11 +3,16 @@ from copy import deepcopy
 from typing import Dict, List, Mapping, Optional, Union
 import torch
-from torch import nn
+from torch import Tensor, nn
 from fusion_bench.method.base_algorithm import BaseAlgorithm
 from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
+from fusion_bench.models.utils import (
+    get_target_state_dict,
+    load_state_dict_into_target_modules,
+    validate_target_modules_equal,
+)
 from fusion_bench.utils import LazyStateDict
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
@@ -21,21 +26,22 @@ log = logging.getLogger(__name__)
 def simple_average(
-    modules: List[Union[nn.Module, StateDictType]],
-    base_module: Optional[nn.Module] = None,
+    modules: List[Union[nn.Module, StateDictType, Tensor]],
+    base_module: Optional[Union[nn.Module, StateDictType, Tensor]] = None,
 ):
     R"""
     Averages the parameters of a list of PyTorch modules or state dictionaries.
     This function takes a list of PyTorch modules or state dictionaries and returns a new module with the averaged parameters, or a new state dictionary with the averaged parameters.
+    If `_fusion_bench_target_modules` attribute is set on the modules, only the parameters of the specified target submodules will be averaged.
     Args:
-        modules (List[Union[nn.Module, StateDictType]]): A list of PyTorch modules or state dictionaries.
-        base_module (Optional[nn.Module]): A base module to use for the new module. If provided, the averaged parameters will be loaded into this module. If not provided, a new module will be created by copying the first module in the list.
+        modules (List[Union[nn.Module, StateDictType, Tensor]]): A list of PyTorch modules or state dictionaries.
+        base_module (Optional[Union[nn.Module, StateDictType, Tensor]]): A base module to use for the new module. If provided, the averaged parameters will be loaded into this module. If not provided, a new module will be created by copying the first module in the list.
     Returns:
-        module_or_state_dict (Union[nn.Module, StateDictType]): A new PyTorch module with the averaged parameters, or a new state dictionary with the averaged parameters.
+        module_or_state_dict (Union[nn.Module, StateDictType, Tensor]): A new PyTorch module with the averaged parameters, or a new state dictionary with the averaged parameters.
     Examples:
         >>> import torch.nn as nn
         >>> model1 = nn.Linear(10, 10)
@@ -47,23 +53,42 @@ def simple_average(
         >>> averaged_state_dict = simple_average([state_dict1, state_dict2])
     """
     assert len(modules) > 0, "modules must be a non-empty list"
+    validate_target_modules_equal(modules)
     if isinstance(modules[0], nn.Module):
         if base_module is None:
             new_module = deepcopy(modules[0])
         else:
             new_module = base_module
-        state_dict = state_dict_avg([module.state_dict() for module in modules])
-        new_module.load_state_dict(state_dict)
+        state_dict = state_dict_avg(
+            [get_target_state_dict(module) for module in modules]
+        )
+        load_state_dict_into_target_modules(new_module, state_dict)
         return new_module
     elif isinstance(modules[0], Mapping):
-        return state_dict_avg(modules)
+        # if the modules are state dicts
+        # compute the average state dict
+        avg_state_dict = state_dict_avg(modules)
+        # load into base_module if provided
+        if base_module is not None:
+            for k in avg_state_dict:
+                base_module[k] = avg_state_dict[k]
+            return base_module
+        else:
+            return avg_state_dict
+    elif isinstance(modules[0], Tensor):
+        mean_tensor = torch.stack(modules, dim=0).mean(dim=0)
+        if base_module is not None:
+            base_module.data = mean_tensor
+            return base_module
+        else:
+            return mean_tensor
+    else:
+        raise ValueError(f"Unsupported type: {type(modules[0])}")
 @auto_register_config
-class SimpleAverageAlgorithm(
-    SimpleProfilerMixin,
-    BaseAlgorithm,
-):
+class SimpleAverageAlgorithm(SimpleProfilerMixin, BaseAlgorithm):
     def __init__(self, show_pbar: bool = False, inplace: bool = True, **kwargs):
         """
         Args:
@@ -87,13 +112,20 @@ class SimpleAverageAlgorithm(
         Returns:
             The fused model obtained by simple averaging.
         """
-        if isinstance(modelpool, dict):
+        if not isinstance(modelpool, BaseModelPool):
             modelpool = BaseModelPool(modelpool)
         log.info(
             f"Fusing models using simple average on {len(modelpool.model_names)} models. "
             f"models: {modelpool.model_names}"
         )
+        if modelpool.has_instance_models and self.inplace:
+            log.warning(
+                "The model pool contains instance models, and inplace is set to True. "
+                "Therefore, the weights of the first model will be overwritten. "
+                "If this is desired behavior, this warning can be ignored."
+            )
         sd: Optional[StateDictType] = None
         forward_model = None
         merged_model_names = []
@@ -106,12 +138,12 @@ class SimpleAverageAlgorithm(
             with self.profile("merge weights"):
                 if sd is None:
                     # Initialize the state dictionary with the first model's state dictionary
-                    sd = model.state_dict()
+                    sd = get_target_state_dict(model)
                     forward_model = model if self.inplace else deepcopy(model)
                 else:
                     # Add the current model's state dictionary to the accumulated state dictionary
                     sd = state_dict_add(
-                        sd, model.state_dict(), show_pbar=self.show_pbar
+                        sd, get_target_state_dict(model), show_pbar=self.show_pbar
                     )
         with self.profile("merge weights"):
             # Divide the accumulated state dictionary by the number of models to get the average
@@ -124,11 +156,13 @@ class SimpleAverageAlgorithm(
             forward_model = deepcopy(forward_model.meta_module).to_empty(
                 device=forward_model._device
             )
-        result = forward_model.load_state_dict(sd, strict=False)
+        result = load_state_dict_into_target_modules(forward_model, sd, strict=False)
         if result.unexpected_keys:
             raise ValueError(f"Unexpected keys in state dict: {result.unexpected_keys}")
         if result.missing_keys:
             log.warning(f"Missing keys in state dict: {result.missing_keys}")
         # print profile report and log the merged models
         self.print_profile_summary()
         log.info(f"merged {len(merged_model_names)} models:")

fusion_bench/method/task_arithmetic/task_arithmetic.py CHANGED Viewed

@@ -50,7 +50,7 @@ def task_arithmetic_merge(
         finetuned_models (List[nn.Module]): A list of fine-tuned models from which task vectors will be calculated.
         scaling_factor (float): A factor by which the task vectors will be scaled before merging.
         inplace (bool, optional): If True, the pre-trained model will be modified in place.
-                                  If False, a copy of the pre-trained model will be modified. Defaults to True.
+            If False, a copy of the pre-trained model will be modified. Defaults to True.
     Returns:
         nn.Module: The pre-trained model with the merged task vectors.

fusion_bench/method/task_singular_vector/TSVM.py CHANGED Viewed

@@ -249,12 +249,13 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
         # - SVD finds the principal components (most important directions)
         # - Task vectors are reconstructed using only the most significant components
         # - The reconstructed vectors are merged (summed) to create a unified task vector
-        new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
-            task_vectors,
-            exclude_keys=self.exclude_keys,  # Skip certain parameters from SVD
-            accelerator=accelerator,  # Use GPU if available
-            return_single_task_models=self.return_single_task_models,
-        )
+        with torch.no_grad():
+            new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
+                task_vectors,
+                exclude_keys=self.exclude_keys,  # Skip certain parameters from SVD
+                accelerator=accelerator,  # Use GPU if available
+                return_single_task_models=self.return_single_task_models,
+            )
         # Handle the case where individual transformed task vectors are also returned
         if self.return_single_task_models:

fusion_bench/method/task_singular_vector/utils/TSVM_utils.py CHANGED Viewed

@@ -311,7 +311,6 @@ def compute_and_sum_svd_mem_reduction_lossless_eigen(
 ###############
 #### TSV Merge Orthogonalization
-@torch.no_grad()
 def compute_and_sum_svd_mem_reduction(
     task_vectors: List[StateDictType],
     exclude_keys: Optional[List[str]] = None,

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import functools
 import logging
 import os
+import sys
 from typing import TYPE_CHECKING, Any, List, Mapping, Optional, TypeVar
 import lightning as L
@@ -10,18 +11,34 @@ from lightning.fabric.loggers import TensorBoardLogger
 from lightning.fabric.utilities.rank_zero import rank_zero_only
 from omegaconf import DictConfig, OmegaConf
+from fusion_bench.constants import RuntimeConstants
 from fusion_bench.utils import import_object
+from fusion_bench.utils.hydra_utils import get_hydra_output_dir
 from fusion_bench.utils.instantiate_utils import instantiate
 if TYPE_CHECKING:
     import lightning.fabric.loggers.tensorboard
     from lightning.fabric.strategies import FSDPStrategy
+    from lightning.pytorch.loggers import MLFlowLogger
+    from mlflow.tracking.client import MlflowClient
 log = logging.getLogger(__name__)
 TensorOrModule = TypeVar("TensorOrModule", torch.Tensor, torch.nn.Module, Any)
+def _fabric_has_logger(fabric: L.Fabric) -> bool:
+    """
+    Check if the fabric has a logger.
+    Args:
+        fabric (L.Fabric): The Lightning Fabric instance.
+    Returns:
+        bool: True if the fabric has a logger, False otherwise.
+    """
+    return fabric._loggers is not None and len(fabric._loggers) > 0
 def get_policy(*args: str) -> set:
     """
     Get the policy from the provided list of policy names.
@@ -42,6 +59,21 @@ def get_size_based_auto_wrap_policy(*args, **kwargs):
     return policy
+def _is_mlflow_logger(fabric: L.Fabric) -> bool:
+    """
+    Check if the fabric's logger is an instance of MLFlowLogger.
+    Args:
+        fabric (L.Fabric): The Lightning Fabric instance.
+    Returns:
+        bool: True if the logger is an instance of MLFlowLogger, False otherwise.
+    """
+    if not _fabric_has_logger(fabric):
+        return False
+    return fabric.logger.__class__.__name__ == "MLFlowLogger"
 class LightningFabricMixin:
     """
     A mixin class for integrating Lightning Fabric into a project.
@@ -78,8 +110,8 @@ class LightningFabricMixin:
         """
         if self._fabric_instance is None:
             if config.get("fabric", None) is None:
-                log.warning("No fabric configuration found. use default settings.")
-                self._fabric_instance = L.Fabric()
+                log.warning("No fabric configuration found. use default settings. By default, use 1 device.")
+                self._fabric_instance = L.Fabric(devices=1)
             else:
                 self._fabric_instance = instantiate(config.fabric)
             if not _is_using_cli():  # if not using cli, launch the fabric
@@ -122,7 +154,10 @@ class LightningFabricMixin:
         Retrieves the log directory from the fabric's logger.
         """
         if self.fabric is not None and len(self.fabric._loggers) > 0:
-            log_dir = self.fabric.logger.log_dir
+            if hasattr(self.fabric.logger, "log_dir"):
+                log_dir = self.fabric.logger.log_dir
+            else:
+                log_dir = None
             # Special handling for SwanLabLogger to get the correct log directory
             if (
@@ -131,6 +166,20 @@ class LightningFabricMixin:
             ):
                 log_dir = self.fabric.logger.save_dir or self.fabric.logger._logdir
+            if (
+                log_dir is None
+                and self.fabric.logger.__class__.__name__ == "MLFlowLogger"
+            ):
+                log_dir = self.fabric.logger.save_dir
+                if log_dir is None:
+                    try:
+                        log_dir = self._program.config.path.log_dir
+                    except Exception:
+                        log.error(
+                            "Failed to get log_dir from program config for MLFlowLogger."
+                        )
+                        log_dir = "outputs"
             assert log_dir is not None, "log_dir should not be None"
             if self.fabric.is_global_zero and not os.path.exists(log_dir):
                 os.makedirs(log_dir, exist_ok=True)
@@ -206,14 +255,7 @@ class LightningFabricMixin:
         Returns:
             bool: True if fast_dev_run is enabled, False otherwise.
         """
-        if hasattr(self, "config") and self.config.get("fast_dev_run", False):
-            return True
-        elif hasattr(self, "_program") and self._program.config.get(
-            "fast_dev_run", False
-        ):
-            return True
-        else:
-            return False
+        return RuntimeConstants().debug
     def log(self, name: str, value: Any, step: Optional[int] = None):
         """
@@ -252,3 +294,60 @@ class LightningFabricMixin:
         """
         for i, param_group in enumerate(optimizer.param_groups):
             self.fabric.log(name_template.format(i), param_group["lr"], step=step)
+    def log_artifact(self, local_path: str, artifact_path: str | None = None):
+        """
+        Logs a file as an artifact to the fabric's logger.
+        Args:
+            local_dir: The path to the directory to log as an artifact.
+            artifact_path: The directory within the logger's artifact storage to save the file.
+        """
+        if _is_mlflow_logger(self.fabric):
+            logger: "MLFlowLogger" = self.fabric.logger
+            experiment: "MlflowClient" = logger.experiment
+            experiment.log_artifact(
+                logger.run_id,
+                local_path=local_path,
+                artifact_path=(artifact_path),
+            )
+    def log_artifacts(self, local_dir: str, artifact_path: str | None = None):
+        """
+        Logs a directory as artifacts to the fabric's logger.
+        Args:
+            local_dir: The path to the directory to log as artifacts.
+            artifact_path: The directory within the logger's artifact storage to save the files.
+        """
+        if _is_mlflow_logger(self.fabric):
+            logger: "MLFlowLogger" = self.fabric.logger
+            experiment: "MlflowClient" = logger.experiment
+            experiment.log_artifacts(
+                logger.run_id,
+                local_dir=local_dir,
+                artifact_path=artifact_path,
+            )
+    def finalize(self):
+        """
+        Destructor to ensure proper cleanup of the Lightning Fabric instance.
+        """
+        if self._fabric_instance is None:
+            return
+        if _fabric_has_logger(self.fabric) and _is_mlflow_logger(self.fabric):
+            if sys.exc_info()[0] is None:
+                status = "success"
+            else:
+                status = "failed"
+            self.fabric.logger.finalize(status)
+        del self._fabric_instance
+        self._fabric_instance = None
+    def __del__(self):
+        """
+        Destructor to ensure proper cleanup of the Lightning Fabric instance.
+        """
+        self.finalize()

fusion_bench/mixins/openclip_classification.py CHANGED Viewed

@@ -1,11 +1,165 @@
+import functools
 import logging
+from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Literal, Optional
+import torch
+from omegaconf import DictConfig
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from fusion_bench.dataset.clip_dataset import CLIPDataset
 from fusion_bench.mixins import LightningFabricMixin
-from fusion_bench.models.open_clip import ImageClassifier, ImageEncoder
+from fusion_bench.modelpool import OpenCLIPVisionModelPool
+from fusion_bench.models.open_clip import (
+    ClassificationHead,
+    ImageClassifier,
+    ImageEncoder,
+)
+from fusion_bench.utils.data import InfiniteDataLoader
 log = logging.getLogger(__name__)
 class OpenCLIPClassificationMixin(LightningFabricMixin):
     _train_processor = None
     _test_processor = None
+    dataloader_kwargs: DictConfig
+    modelpool: OpenCLIPVisionModelPool
+    zero_shot_heads: Dict[str, ClassificationHead] = {}
+    def _init_processor(self, encoder: Optional["ImageEncoder"] = None):
+        """
+        Initialize the CLIP processors for training and testing.
+        """
+        if encoder is None:
+            encoder: "ImageEncoder" = self.modelpool.load_pretrained_or_first_model()
+        self._train_processor = encoder.train_preprocess
+        self._test_processor = encoder.val_preprocess
+        return self._train_processor, self._test_processor
+    def get_clip_processor(self, stage: Literal["train", "test"]):
+        """
+        Get the CLIP processor, loading it from the model pool if necessary.
+        Returns:
+            CLIPProcessor: The CLIP processor for image and text preprocessing.
+        Raises:
+            AssertionError: If the model pool is not set.
+        """
+        if stage == "train":
+            if self._train_processor is None:
+                self._init_processor()
+            return self._train_processor
+        elif stage == "test":
+            if self._test_processor is None:
+                self._init_processor()
+            return self._test_processor
+        else:
+            raise ValueError(f"Invalid stage: {stage}")
+    def setup_zero_shot_classification_head(
+        self,
+        task_names: Optional[List[str]] = None,
+        freeze: bool = True,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        # check task names consistency across processes
+        _task_names = self.fabric.broadcast(task_names, src=0)
+        if not self.fabric.is_global_zero and task_names != _task_names:
+            raise ValueError("The `task_names` must be the same across all processes.")
+        for task in tqdm(
+            self.modelpool.model_names if task_names is None else task_names,
+            "Setting up zero-shot classification head",
+            disable=not self.fabric.is_global_zero,
+        ):
+            head = self.modelpool.load_classification_head(task)
+            if freeze:
+                head.requires_grad_(False)
+            if dtype is not None:
+                head = head.to(dtype=dtype)
+            self.zero_shot_heads[task] = self.to_device(head)
+    def set_clip_processor(self, stage: Literal["train", "test"], processor: Callable):
+        """
+        Set the CLIP processor for a specific stage.
+        Args:
+            stage (Literal["train", "test"]): The stage for which to set the processor.
+            processor (Callable): The CLIP processor to set.
+        """
+        if stage == "train":
+            self._train_processor = processor
+        elif stage == "test":
+            self._test_processor = processor
+        else:
+            raise ValueError(f"Invalid stage: {stage}")
+    @functools.cache
+    def get_shuffled_test_loader_iter(
+        self,
+        task: str,
+        batch_size: Optional[int] = None,
+        num_workers: Optional[int] = None,
+        **loader_kwargs,
+    ) -> Iterator:
+        """
+        Get an iterator for a shuffled test DataLoader.
+        This method creates a DataLoader for the test dataset of the specified task,
+        with shuffling enabled. It allows for optional customization of batch size,
+        number of workers, and other DataLoader keyword arguments.
+        Args:
+            task (str): The task identifier for which the test dataset is to be loaded.
+            batch_size (Optional[int]): The batch size to use for the DataLoader. If None, the default batch size is used.
+            num_workers (Optional[int]): The number of worker processes to use for data loading. If None, the default number of workers is used.
+            **loader_kwargs: Additional keyword arguments to pass to the DataLoader.
+        Returns:
+            Iterator: An iterator over the shuffled test DataLoader.
+        """
+        # get dataloader kwargs
+        dataloader_kwargs = self.dataloader_kwargs.copy()
+        dataloader_kwargs["shuffle"] = True
+        if batch_size is not None:
+            dataloader_kwargs["batch_size"] = batch_size
+        if num_workers is not None:
+            dataloader_kwargs["num_workers"] = num_workers
+        dataloader_kwargs.update(loader_kwargs)
+        # get the test dataset
+        clip_dataset = CLIPDataset(
+            self.modelpool.load_test_dataset(task),
+            processor=self.get_clip_processor(stage="test"),
+        )
+        # create the dataloader
+        loader = DataLoader(clip_dataset, **dataloader_kwargs)
+        loader = self.fabric.setup_dataloaders(loader)
+        return iter(InfiniteDataLoader(loader))
+    def compute_logits(
+        self,
+        module: ImageClassifier,
+        images,
+        task: str,
+    ):
+        """
+        Compute the logits for a batch of images using the provided module and task.
+        Args:
+            module (ImageClassifier): The image classification module to use for computing logits.
+            images (torch.Tensor): The batch of images for which to compute logits.
+            task (str): The task identifier to specify which classification head to use.
+        Returns:
+            torch.Tensor: The computed logits for the input images.
+        """
+        if len(self.zero_shot_heads) == 0:
+            self.setup_zero_shot_classification_head()
+        task_head = self.zero_shot_heads[task]
+        features = module(images)
+        logits = task_head(features)
+        return logits

fusion_bench/mixins/serialization.py CHANGED Viewed

@@ -68,7 +68,7 @@ def auto_register_config(cls):
     Behavior:
         - **Parameter Registration**: All non-variadic parameters (excluding ``*args``, ``**kwargs``)
-          from the __init__ method are automatically added to _config_mapping
+            from the __init__ method are automatically added to _config_mapping
         - **Positional Arguments**: Handled in order and mapped to corresponding parameter names
         - **Keyword Arguments**: Processed after positional arguments, overriding any conflicts
         - **Default Values**: Applied when parameters are not provided via arguments

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -7,10 +7,12 @@ from omegaconf import DictConfig, OmegaConf, UnsupportedValueType
 from torch import nn
 from torch.utils.data import Dataset
+from fusion_bench import StateDictType, TorchModelType
 from fusion_bench.mixins import BaseYAMLSerializable, HydraConfigMixin
 from fusion_bench.utils import (
     ValidationError,
     instantiate,
+    state_dict_sub,
     timeit_context,
     validate_model_name,
 )
@@ -56,6 +58,10 @@ class BaseModelPool(
         **kwargs,
     ):
         if isinstance(models, List):
+            log.debug(
+                "Initializing BaseModelPool with a list of models. "
+                "Converting to a dictionary with integer string keys."
+            )
             models = {str(model_idx): model for model_idx, model in enumerate(models)}
         if isinstance(models, dict):
@@ -80,6 +86,22 @@ class BaseModelPool(
         self._test_datasets = test_datasets
         super().__init__(**kwargs)
+    @property
+    def has_instance_models(self) -> bool:
+        """
+        Check if the model pool contains any pre-instantiated models.
+        Attention:
+            Some algorithms may modify the models in-place if they are pre-instantiated.
+        Returns:
+            bool: True if there are pre-instantiated models, False otherwise.
+        """
+        for model_cfg in self._models.values():
+            if isinstance(model_cfg, nn.Module):
+                return True
+        return False
     @property
     def has_pretrained(self) -> bool:
         """
@@ -328,6 +350,21 @@ class BaseModelPool(
         for model_name in self.model_names:
             yield model_name, self.load_model(model_name)
+    def load_pretrained_model_and_task_vectors(
+        self,
+    ) -> Tuple[TorchModelType, List[StateDictType]]:
+        pretrained_model = self.load_pretrained_model()
+        task_vectors = []
+        for model_name in self.model_names:
+            finetuned_model = self.load_model(model_name)
+            task_vector = state_dict_sub(
+                finetuned_model.state_dict(), pretrained_model.state_dict()
+            )
+            task_vectors.append(task_vector)
+        return pretrained_model, task_vectors
     @property
     def has_train_dataset(self) -> bool:
         """

fusion_bench/modelpool/convnext_for_image_classification.py CHANGED Viewed

@@ -98,7 +98,7 @@ class ConvNextForImageClassificationPool(BaseModelPool):
     - Load ConvNeXt models either from a pretrained checkpoint or from config.
     - Optionally adapt the classifier head to match dataset classnames.
     - Override `forward` to return logits for consistent interfaces within
-      FusionBench.
+        FusionBench.
     See `fusion_bench.modelpool.resnet_for_image_classification` for a closely
     related ResNet-based pool with analogous behavior.
@@ -161,6 +161,9 @@ class ConvNextForImageClassificationPool(BaseModelPool):
         ).logits
         model.original_forward = original_forward
+        # Mark ConvNeXt layers for FusionBench fusion
+        model._fusion_bench_target_modules = ["convnext"]
         return model
     @override
@@ -180,7 +183,7 @@ class ConvNextForImageClassificationPool(BaseModelPool):
         - The ConvNeXt model via `model.save_pretrained`.
         - The paired image processor via `AutoImageProcessor.save_pretrained`.
         - If `algorithm_config` is provided and on rank-zero, a README model card
-          documenting the FusionBench configuration.
+            documenting the FusionBench configuration.
         """
         model.save_pretrained(path)
         self.load_processor().save_pretrained(path)

fusion-bench 0.2.30__py3-none-any.whl → 0.2.32__py3-none-any.whl

fusion-bench 0.2.30py3-none-any.whl → 0.2.32py3-none-any.whl