PyPI - fusion-bench - Versions diffs - 0.2.30__py3-none-any.whl → 0.2.31__py3-none-any.whl - Mend

fusion-bench 0.2.30py3-none-any.whl → 0.2.31py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

fusion_bench/constants/runtime.py +4 -1
fusion_bench/method/classification/image_classification_finetune.py +1 -0
fusion_bench/method/concrete_subspace/clip_concrete_tsvm.py +285 -0
fusion_bench/method/task_singular_vector/TSVM.py +7 -6
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +0 -1
fusion_bench/mixins/lightning_fabric.py +2 -8
fusion_bench/mixins/openclip_classification.py +155 -1
fusion_bench/modelpool/base_pool.py +1 -0
fusion_bench/modelpool/openclip_vision/modelpool.py +12 -3
fusion_bench/models/open_clip/modeling.py +61 -5
fusion_bench/models/open_clip/utils.py +13 -2
fusion_bench/py.typed +1 -0
fusion_bench/scripts/cli.py +7 -16
fusion_bench/scripts/imgui.py +2 -2
fusion_bench/scripts/webui.py +2 -2
fusion_bench/utils/__init__.py +2 -0
fusion_bench/utils/hydra_utils.py +75 -0
fusion_bench/utils/parameters.py +33 -0
fusion_bench/utils/rich_utils.py +42 -19
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/METADATA +1 -1
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/RECORD +29 -26
fusion_bench_config/README.md +9 -0
fusion_bench_config/fabric/auto.yaml +1 -0
fusion_bench_config/hydra/default.yaml +3 -1
fusion_bench_config/method/concrete_subspace/clip_concrete_tsvm.yaml +38 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/WHEEL +0 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/licenses/LICENSE +0 -0
{fusion_bench-0.2.30.dist-info → fusion_bench-0.2.31.dist-info}/top_level.txt +0 -0

fusion_bench/constants/runtime.py CHANGED Viewed

@@ -89,7 +89,10 @@ class RuntimeConstants:
             self._initialized = True
     debug = False
-    """Global debug flag for enabling verbose logging and debugging features."""
+    """
+    Global debug flag for enabling verbose logging and debugging features.
+    Use `RuntimeConstants().debug` instead of `RuntimeConstants.debug`
+    """
     @property
     def cache_dir(self) -> Path:

fusion_bench/method/classification/image_classification_finetune.py CHANGED Viewed

@@ -173,6 +173,7 @@ class ImageClassificationFineTuning(BaseAlgorithm):
                 ),
             },
         )
+        lit_module.train()
         log_dir = (
             self._program.path.log_dir

fusion_bench/method/concrete_subspace/clip_concrete_tsvm.py ADDED Viewed

@@ -0,0 +1,285 @@
+import logging
+import os
+from copy import deepcopy
+from typing import TYPE_CHECKING, Dict, Iterable, List, Literal, Optional
+import torch
+from omegaconf import DictConfig
+from tqdm import tqdm
+from fusion_bench import (
+    BaseAlgorithm,
+    OpenCLIPClassificationMixin,
+    OpenCLIPVisionModelPool,
+    SimpleProfilerMixin,
+    StateDictType,
+    auto_register_config,
+    get_rankzero_logger,
+    instantiate,
+)
+from fusion_bench.method.adamerging.entropy_loss import entropy_loss
+from fusion_bench.method.task_singular_vector import TaskSingularVectorMerging
+from fusion_bench.method.task_singular_vector.utils import (
+    TSVM_utils,
+    check_parameterNamesMatch,
+    check_state_dicts_equal,
+    state_dict_to_vector,
+    vector_to_state_dict,
+)
+from fusion_bench.models.masks import MaskModel, mask_sparsity
+from fusion_bench.models.open_clip import (
+    ClassificationHead,
+    ImageClassifier,
+    ImageEncoder,
+)
+from fusion_bench.models.wrappers.task_wise_fusion import (
+    TaskWiseMergedModel,
+    get_task_wise_weights,
+)
+from fusion_bench.utils.devices import clear_cuda_cache
+from fusion_bench.utils.dtype import parse_dtype
+from fusion_bench.utils.parameters import print_parameters, print_trainable_parameters
+from fusion_bench.utils.rich_utils import print_config_yaml
+from fusion_bench.utils.state_dict_arithmetic import (
+    _validate_state_dict_same_keys,
+    state_dict_add,
+    state_dict_hadamard_product,
+    state_dict_mul,
+    state_dict_sub,
+)
+log = get_rankzero_logger(__name__)
+@auto_register_config
+class ConcreteTSVMForOpenCLIP(
+    OpenCLIPClassificationMixin,
+    SimpleProfilerMixin,
+    BaseAlgorithm,
+):
+    def __init__(
+        self,
+        dataloader_kwargs: DictConfig,
+        optimizer: DictConfig,
+        lr_scheduler: DictConfig,
+        max_steps: int,
+        save_interval: int,
+        initial_logits: float,
+        temperature: float,
+        eval_mask_type: Literal["continuous", "discrete"],
+        mask_checkpoint: Optional[str],
+        merge_dtype: str,
+        clamp_weights: bool,
+        tie_weights: bool,
+        strict: bool,
+        skip_training: bool,
+        # === TSVM parameters ===
+        exclude_keys: Optional[List[str]],
+        alpha: float,
+        return_single_task_models: bool = True,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        if not return_single_task_models:
+            log.warning("return_single_task_models is forced to be True here.")
+            self.return_single_task_models = True
+    @torch.no_grad()
+    def setup_models(self):
+        """
+        load the pre-trained model, task vectors, and construct the mask model.
+        """
+        merge_dtype = parse_dtype(self.merge_dtype)
+        modelpool = self.modelpool
+        # load the pre-trained model
+        pretrained_model = modelpool.load_pretrained_model()
+        self.set_clip_processor(stage="test", processor=pretrained_model.val_preprocess)
+        # constrcute mask model
+        mask_model = MaskModel(
+            pretrained_model, ignore_untrained_params=True, parameter_type="logits"
+        )
+        if merge_dtype is not None:
+            mask_model.to(merge_dtype)
+        mask_model.fill_(self.initial_logits)
+        if self.fabric.is_global_zero:
+            print("summary of mask model:")
+            print_parameters(mask_model)
+        if self.fabric.is_global_zero:
+            tsvm_algo = TaskSingularVectorMerging(
+                alpha=self.alpha,
+                exclude_keys=self.exclude_keys,
+                return_single_task_models=self.return_single_task_models,
+            )
+            tsvm_algo._fabric_instance = self.fabric
+            models = tsvm_algo.run(modelpool)
+            finetuned_models = [models[name] for name in modelpool.model_names]
+            task_wise_weight = get_task_wise_weights(
+                num_models=len(modelpool.model_names),
+                init_values=self.alpha,
+            )
+            # create a wrapped model
+            module = TaskWiseMergedModel(
+                task_wise_weight=task_wise_weight,
+                pretrained_model=pretrained_model,
+                finetuned_models=finetuned_models,
+                clamp_weights=self.clamp_weights,
+                tie_weights=self.tie_weights,
+                strict=self.strict,
+                task_vector_dtype=merge_dtype,
+            )
+            module = module.to(dtype=merge_dtype)
+            print("trainable parameter summary of merged model (TaskWiseMergedModel):")
+            print_trainable_parameters(module)
+        else:
+            module = None
+        with torch.no_grad():
+            self.fabric.barrier()
+            module = self.fabric.broadcast(module, src=0)
+        return module, mask_model
+    def train_mask(self, module: TaskWiseMergedModel, mask_model: MaskModel):
+        """
+        Train the mask model using the provided module.
+        This method configures the optimizer, sets up the mask model, and performs test-time adaptation to train the mask model.
+        Args:
+            module (TaskWiseMergedModel): The wrapped model with task-wise weights.
+            mask_model (MaskModel): The mask model to be trained.
+        """
+        config = self.config
+        merge_dtype = parse_dtype(self.merge_dtype)
+        log.info(f"Using merge dtype: {merge_dtype}")
+        optimizer: "torch.optim.Optimizer" = instantiate(
+            self.optimizer,
+            params=filter(lambda p: p.requires_grad, mask_model.parameters()),
+        )
+        print(f"{optimizer=}")
+        if self.lr_scheduler is not None:
+            lr_scheduler = instantiate(
+                self.lr_scheduler,
+                optimizer=optimizer,
+            )
+            print(f"{lr_scheduler=}")
+        else:
+            lr_scheduler = None
+        log.info("Setup models and optimizer with Fabric.")
+        mask_model, optimizer = self.fabric.setup(mask_model, optimizer)
+        log.info("Move the merged module to the correct device and disable gradients.")
+        module.requires_grad_(False)
+        module.to(mask_model.device)
+        mask_model.train()
+        optimizer.zero_grad()
+        for step_idx in (
+            pbar := tqdm(
+                range(self.config.max_steps if not self.is_debug_mode else 5),
+                ("[DEBUG MODE] " if self.is_debug_mode else "")
+                + "Concrete TSVM Test-Time Adaptation",
+                dynamic_ncols=True,
+                disable=not self.fabric.is_global_zero,
+            )
+        ):
+            metrics = {}
+            # sample a shared mask and merge weights
+            with self.profile("sample mask"):
+                mask = mask_model.sample_mask(
+                    mask_type="continuous", temperature=config.temperature
+                )
+                metrics["train/sparsity"] = mask_sparsity(mask)
+            with self.profile("merge weights"):
+                # rescale mask
+                for name, m in mask.items():
+                    mask[name] = m / torch.mean(m)
+                module.merge_weights(task_vector_mask=mask)
+            # ------ inner optimization goes here ------
+            # NOTE:
+            #   Because the algorithmic parameters of TSVM are assumed to be chosen on a validation test
+            #   set, we do not need to perform inner optimization here. So here we skip the inner optimization step.
+            # ------------------------------------------
+            total_loss = None
+            for task in self.modelpool.model_names:
+                with self.profile("data loading"):
+                    batch = next(self.get_shuffled_test_loader_iter(task))
+                    # NOTE: The labels are not allowed to be used during test-time adaptation
+                    images = batch[0].to(dtype=merge_dtype)
+                with self.profile("forward pass"):
+                    logits = self.compute_logits(module, images, task)
+                    loss = entropy_loss(logits)
+                    total_loss = loss if total_loss is None else total_loss + loss
+            with self.profile("compute grad"):
+                self.fabric.backward(total_loss)
+            with self.profile("optimizer step"):
+                optimizer.step()
+                optimizer.zero_grad()
+                if lr_scheduler is not None:
+                    lr_scheduler.step()
+            metrics.update({"train/loss": loss.item()})
+            self.fabric.log_dict(metrics, step=step_idx)
+            pbar.set_postfix(metrics)
+            if (step_idx + 1) % self.config.save_interval == 0:
+                with self.profiler.profile("save checkpoint"):
+                    save_dir = os.path.join(self.fabric.logger.log_dir, "checkpoints")
+                    if not os.path.exists(save_dir):
+                        os.makedirs(save_dir, exist_ok=True)
+                    save_path = os.path.join(save_dir, f"mask_steps_{step_idx}.pt")
+                    print(f"saving checkpoint to {save_path}")
+                    state = {"model": mask_model}
+                    self.fabric.save(save_path, state)
+                    # Create or update a symbolic link to the latest checkpoint
+                    if self.fabric.is_global_zero:
+                        symlink_path = os.path.join(save_dir, "latest_checkpoint.pt")
+                        if os.path.exists(symlink_path):
+                            os.remove(symlink_path)
+                        os.link(os.path.abspath(save_path), symlink_path)
+                self.print_profile_summary()
+    def run(self, modelpool: OpenCLIPVisionModelPool):
+        self.modelpool = modelpool
+        merge_dtype = parse_dtype(self.merge_dtype)
+        with self.profile("setup models"):
+            module, mask_model = self.setup_models()
+            self.setup_zero_shot_classification_head(freeze=True, dtype=merge_dtype)
+        if self.mask_checkpoint is None:
+            if not self.skip_training:
+                clear_cuda_cache()
+                self.train_mask(module, mask_model=mask_model)
+        else:
+            if self.fabric.is_global_zero:
+                print("loading mask from checkpoint", self.mask_checkpoint)
+            self.fabric.load(self.mask_checkpoint, {"model": mask_model})
+        with torch.no_grad():
+            clear_cuda_cache()
+            mask = mask_model.sample_mask(
+                mask_type=self.eval_mask_type, temperature=self.temperature
+            )
+            # rescale mask
+            for name, m in mask.items():
+                mask[name] = m / torch.mean(m)
+            model = module.merge_and_unload(mask)
+        return model.to(dtype=torch.float32)

fusion_bench/method/task_singular_vector/TSVM.py CHANGED Viewed

@@ -249,12 +249,13 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
         # - SVD finds the principal components (most important directions)
         # - Task vectors are reconstructed using only the most significant components
         # - The reconstructed vectors are merged (summed) to create a unified task vector
-        new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
-            task_vectors,
-            exclude_keys=self.exclude_keys,  # Skip certain parameters from SVD
-            accelerator=accelerator,  # Use GPU if available
-            return_single_task_models=self.return_single_task_models,
-        )
+        with torch.no_grad():
+            new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
+                task_vectors,
+                exclude_keys=self.exclude_keys,  # Skip certain parameters from SVD
+                accelerator=accelerator,  # Use GPU if available
+                return_single_task_models=self.return_single_task_models,
+            )
         # Handle the case where individual transformed task vectors are also returned
         if self.return_single_task_models:

fusion_bench/method/task_singular_vector/utils/TSVM_utils.py CHANGED Viewed

@@ -311,7 +311,6 @@ def compute_and_sum_svd_mem_reduction_lossless_eigen(
 ###############
 #### TSV Merge Orthogonalization
-@torch.no_grad()
 def compute_and_sum_svd_mem_reduction(
     task_vectors: List[StateDictType],
     exclude_keys: Optional[List[str]] = None,

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -10,6 +10,7 @@ from lightning.fabric.loggers import TensorBoardLogger
 from lightning.fabric.utilities.rank_zero import rank_zero_only
 from omegaconf import DictConfig, OmegaConf
+from fusion_bench.constants import RuntimeConstants
 from fusion_bench.utils import import_object
 from fusion_bench.utils.instantiate_utils import instantiate
@@ -206,14 +207,7 @@ class LightningFabricMixin:
         Returns:
             bool: True if fast_dev_run is enabled, False otherwise.
         """
-        if hasattr(self, "config") and self.config.get("fast_dev_run", False):
-            return True
-        elif hasattr(self, "_program") and self._program.config.get(
-            "fast_dev_run", False
-        ):
-            return True
-        else:
-            return False
+        return RuntimeConstants().debug
     def log(self, name: str, value: Any, step: Optional[int] = None):
         """

fusion_bench/mixins/openclip_classification.py CHANGED Viewed

@@ -1,11 +1,165 @@
+import functools
 import logging
+from typing import TYPE_CHECKING, Callable, Dict, Iterator, List, Literal, Optional
+import torch
+from omegaconf import DictConfig
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from fusion_bench.dataset.clip_dataset import CLIPDataset
 from fusion_bench.mixins import LightningFabricMixin
-from fusion_bench.models.open_clip import ImageClassifier, ImageEncoder
+from fusion_bench.modelpool import OpenCLIPVisionModelPool
+from fusion_bench.models.open_clip import (
+    ClassificationHead,
+    ImageClassifier,
+    ImageEncoder,
+)
+from fusion_bench.utils.data import InfiniteDataLoader
 log = logging.getLogger(__name__)
 class OpenCLIPClassificationMixin(LightningFabricMixin):
     _train_processor = None
     _test_processor = None
+    dataloader_kwargs: DictConfig
+    modelpool: OpenCLIPVisionModelPool
+    zero_shot_heads: Dict[str, ClassificationHead] = {}
+    def _init_processor(self, encoder: Optional["ImageEncoder"] = None):
+        """
+        Initialize the CLIP processors for training and testing.
+        """
+        if encoder is None:
+            encoder: "ImageEncoder" = self.modelpool.load_pretrained_or_first_model()
+        self._train_processor = encoder.train_preprocess
+        self._test_processor = encoder.val_preprocess
+        return self._train_processor, self._test_processor
+    def get_clip_processor(self, stage: Literal["train", "test"]):
+        """
+        Get the CLIP processor, loading it from the model pool if necessary.
+        Returns:
+            CLIPProcessor: The CLIP processor for image and text preprocessing.
+        Raises:
+            AssertionError: If the model pool is not set.
+        """
+        if stage == "train":
+            if self._train_processor is None:
+                self._init_processor()
+            return self._train_processor
+        elif stage == "test":
+            if self._test_processor is None:
+                self._init_processor()
+            return self._test_processor
+        else:
+            raise ValueError(f"Invalid stage: {stage}")
+    def setup_zero_shot_classification_head(
+        self,
+        task_names: Optional[List[str]] = None,
+        freeze: bool = True,
+        dtype: Optional[torch.dtype] = None,
+    ):
+        # check task names consistency across processes
+        _task_names = self.fabric.broadcast(task_names, src=0)
+        if not self.fabric.is_global_zero and task_names != _task_names:
+            raise ValueError("The `task_names` must be the same across all processes.")
+        for task in tqdm(
+            self.modelpool.model_names if task_names is None else task_names,
+            "Setting up zero-shot classification head",
+            disable=not self.fabric.is_global_zero,
+        ):
+            head = self.modelpool.load_classification_head(task)
+            if freeze:
+                head.requires_grad_(False)
+            if dtype is not None:
+                head = head.to(dtype=dtype)
+            self.zero_shot_heads[task] = self.to_device(head)
+    def set_clip_processor(self, stage: Literal["train", "test"], processor: Callable):
+        """
+        Set the CLIP processor for a specific stage.
+        Args:
+            stage (Literal["train", "test"]): The stage for which to set the processor.
+            processor (Callable): The CLIP processor to set.
+        """
+        if stage == "train":
+            self._train_processor = processor
+        elif stage == "test":
+            self._test_processor = processor
+        else:
+            raise ValueError(f"Invalid stage: {stage}")
+    @functools.cache
+    def get_shuffled_test_loader_iter(
+        self,
+        task: str,
+        batch_size: Optional[int] = None,
+        num_workers: Optional[int] = None,
+        **loader_kwargs,
+    ) -> Iterator:
+        """
+        Get an iterator for a shuffled test DataLoader.
+        This method creates a DataLoader for the test dataset of the specified task,
+        with shuffling enabled. It allows for optional customization of batch size,
+        number of workers, and other DataLoader keyword arguments.
+        Args:
+            task (str): The task identifier for which the test dataset is to be loaded.
+            batch_size (Optional[int]): The batch size to use for the DataLoader. If None, the default batch size is used.
+            num_workers (Optional[int]): The number of worker processes to use for data loading. If None, the default number of workers is used.
+            **loader_kwargs: Additional keyword arguments to pass to the DataLoader.
+        Returns:
+            Iterator: An iterator over the shuffled test DataLoader.
+        """
+        # get dataloader kwargs
+        dataloader_kwargs = self.dataloader_kwargs.copy()
+        dataloader_kwargs["shuffle"] = True
+        if batch_size is not None:
+            dataloader_kwargs["batch_size"] = batch_size
+        if num_workers is not None:
+            dataloader_kwargs["num_workers"] = num_workers
+        dataloader_kwargs.update(loader_kwargs)
+        # get the test dataset
+        clip_dataset = CLIPDataset(
+            self.modelpool.load_test_dataset(task),
+            processor=self.get_clip_processor(stage="test"),
+        )
+        # create the dataloader
+        loader = DataLoader(clip_dataset, **dataloader_kwargs)
+        loader = self.fabric.setup_dataloaders(loader)
+        return iter(InfiniteDataLoader(loader))
+    def compute_logits(
+        self,
+        module: ImageClassifier,
+        images,
+        task: str,
+    ):
+        """
+        Compute the logits for a batch of images using the provided module and task.
+        Args:
+            module (ImageClassifier): The image classification module to use for computing logits.
+            images (torch.Tensor): The batch of images for which to compute logits.
+            task (str): The task identifier to specify which classification head to use.
+        Returns:
+            torch.Tensor: The computed logits for the input images.
+        """
+        if len(self.zero_shot_heads) == 0:
+            self.setup_zero_shot_classification_head()
+        task_head = self.zero_shot_heads[task]
+        features = module(images)
+        logits = task_head(features)
+        return logits

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -7,6 +7,7 @@ from omegaconf import DictConfig, OmegaConf, UnsupportedValueType
 from torch import nn
 from torch.utils.data import Dataset
+from fusion_bench import TorchModelType
 from fusion_bench.mixins import BaseYAMLSerializable, HydraConfigMixin
 from fusion_bench.utils import (
     ValidationError,

fusion_bench/modelpool/openclip_vision/modelpool.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import pickle
 import sys
-from typing import Callable, Optional, Union, cast
+from typing import Callable, Optional, Union, cast, override
 import torch
 from datasets import load_dataset
@@ -41,8 +41,8 @@ def _check_and_redirect_open_clip_modeling():
             )
     try:
-        import src
-        import src.modeling
+        import src  # type: ignore
+        import src.modeling  # type: ignore
     except ImportError:
         if "src" not in sys.modules:
             # redirect the import of `src` to `fusion_bench.models.open_clip`
@@ -114,6 +114,7 @@ class OpenCLIPVisionModelPool(BaseModelPool):
             self._test_processor = encoder.val_preprocess
         return self._test_processor
+    @override
     def load_model(
         self, model_name_or_config: Union[str, DictConfig], *args, **kwargs
     ) -> ImageEncoder:
@@ -210,6 +211,8 @@ class OpenCLIPVisionModelPool(BaseModelPool):
         - A string, which is the path to the model checkpoint in pickle format. Load directly using `torch.load`.
         - Default, load the model using `instantiate` from hydra.
         """
+        if self._classification_heads is None:
+            raise ValueError("No classification heads are defined in the model pool.")
         if (
             isinstance(model_name_or_config, str)
             and model_name_or_config in self._classification_heads
@@ -222,6 +225,8 @@ class OpenCLIPVisionModelPool(BaseModelPool):
         return head
     def load_train_dataset(self, dataset_name: str, *args, **kwargs):
+        if self._train_datasets is None:
+            raise ValueError("No train datasets are defined in the model pool.")
         dataset_config = self._train_datasets[dataset_name]
         if isinstance(dataset_config, str):
             log.info(
@@ -233,6 +238,8 @@ class OpenCLIPVisionModelPool(BaseModelPool):
         return dataset
     def load_val_dataset(self, dataset_name: str, *args, **kwargs):
+        if self._val_datasets is None:
+            raise ValueError("No val datasets are defined in the model pool.")
         dataset_config = self._val_datasets[dataset_name]
         if isinstance(dataset_config, str):
             log.info(
@@ -244,6 +251,8 @@ class OpenCLIPVisionModelPool(BaseModelPool):
         return dataset
     def load_test_dataset(self, dataset_name: str, *args, **kwargs):
+        if self._test_datasets is None:
+            raise ValueError("No test datasets are defined in the model pool.")
         dataset_config = self._test_datasets[dataset_name]
         if isinstance(dataset_config, str):
             log.info(

fusion-bench 0.2.30__py3-none-any.whl → 0.2.31__py3-none-any.whl

fusion-bench 0.2.30py3-none-any.whl → 0.2.31py3-none-any.whl