PyPI - fusion-bench - Versions diffs - 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

fusion_bench/compat/method/__init__.py CHANGED Viewed

@@ -20,9 +20,11 @@ class AlgorithmFactory:
         # model merging methods
         "clip_task_wise_adamerging": ".adamerging.clip_task_wise_adamerging.CLIPTaskWiseAdaMergingAlgorithm",
         "clip_layer_wise_adamerging": ".adamerging.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
-        "clip_layer_wise_adamerging_doge_ta": ".DOGE_TA.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
+        "clip_layer_wise_adamerging_doge_ta": ".doge_ta.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
         "singular_projection_merging": "fusion_bench.method.smile_upscaling.singular_projection_merging.SingularProjectionMergingAlgorithm",
         "clip_layer_wise_adamerging_surgery": ".surgery.clip_layer_wise_adamerging_surgery.CLIPLayerWiseAdaMergingSurgeryAlgorithm",
+        "clip_task_wise_gossip": ".gossip.clip_task_wise_gossip.CLIPTaskWiseGossipAlgorithm",
+        "clip_layer_wise_gossip": ".gossip.clip_layer_wise_gossip.CLIPLayerWiseGossipAlgorithm",
         # plug-and-play model merging methods
         "clip_concrete_task_arithmetic": ".concrete_subspace.clip_concrete_task_arithmetic.ConcreteTaskArithmeticAlgorithmForCLIP",
         "clip_concrete_task_wise_adamerging": ".concrete_subspace.clip_concrete_adamerging.ConcreteTaskWiseAdaMergingForCLIP",

fusion_bench/compat/taskpool/flan_t5_glue_text_generation.py CHANGED Viewed

@@ -148,12 +148,13 @@ class FlanT5GLUETextGenerationTaskPool(LightningFabricMixin, TaskPool):
         else:
             raise ValueError(f"Unknown task {task_config.name}")
-    def evaluate(self, model: T5ForConditionalGeneration):
+    def evaluate(self, model: T5ForConditionalGeneration, name: str = None):
         """
         Evaluate the model on the FlanT5 GLUE text generation tasks.
         Args:
             model (T5ForConditionalGeneration): The model to evaluate.
+            name (str, optional): The name of the model. Defaults to None. This is used to identify the model in the report.
         Returns:
             dict: A dictionary containing the evaluation results for each task.
@@ -169,6 +170,8 @@ class FlanT5GLUETextGenerationTaskPool(LightningFabricMixin, TaskPool):
             "all_params": all_params,
             "trainable_percentage": training_params / all_params,
         }
+        if name is not None:
+            report["model_info"]["name"] = name
         model = self.fabric.setup(model)
         report.update(super().evaluate(model))
         log.info(f"evaluation report: {report}")

fusion_bench/constants/clip_vision.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Constants for CLIP Vision Model Merging
+TASK_NAMES_TA8 = [
+    "sun397",
+    "stanford-cars",
+    "resisc45",
+    "eurosat",
+    "svhn",
+    "gtsrb",
+    "mnist",
+    "dtd",
+]
+TASK_NAMES_TA8_CAP = [
+    "SUN397",
+    "Cars",
+    "RESISC45",
+    "EuroSAT",
+    "SVHN",
+    "GTSRB",
+    "MNIST",
+    "DTD",
+]

fusion_bench/dataset/clip_dataset.py CHANGED Viewed

@@ -2,11 +2,13 @@
 This module provides a class to convert a dataset whose object is a list of dictionaries with keys "image" and "label" to a dataset whose object is a tuple of tensors (inputs, label) for CLIP models.
 """
-from typing import Optional
+from typing import Optional, Tuple
 import torch
 from transformers import CLIPProcessor, ProcessorMixin
+__all__ = ["CLIPDataset"]
 class CLIPDataset(torch.utils.data.Dataset):
     """
@@ -34,7 +36,7 @@ class CLIPDataset(torch.utils.data.Dataset):
         """Returns the number of items in the dataset."""
         return len(self.dataset)
-    def __getitem__(self, idx: int):
+    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
         """
         Retrieves and processes an item from the dataset.
@@ -62,6 +64,12 @@ class CLIPDataset(torch.utils.data.Dataset):
                 inputs = self.processor(images=[image], return_tensors="pt")[
                     "pixel_values"
                 ][0]
+            elif callable(self.processor):
+                inputs = self.processor(image)
+            else:
+                raise ValueError(
+                    "The processor should be a CLIPProcessor or a callable function"
+                )
         else:
             # if processor is None, return the raw image directly
             inputs = image

fusion_bench/dataset/gsm8k.py CHANGED Viewed

@@ -6,7 +6,7 @@ from datasets import load_dataset
 def load_gsm8k_question_label_data(
-    dataset_name: Literal["train", "test", "train_socratic", "test_socratic"]
+    dataset_name: Literal["train", "test", "train_socratic", "test_socratic"],
 ):
     R"""
     Load the GSM8K dataset and extract questions and labels.
@@ -45,7 +45,7 @@ def load_gsm8k_question_label_data(
 def load_gsm8k_question_label_dataset(
-    dataset_name: Literal["train", "test", "train_socratic", "test_socratic"]
+    dataset_name: Literal["train", "test", "train_socratic", "test_socratic"],
 ):
     """
     Load the GSM8K dataset and return it as a Hugging Face Dataset object.

fusion_bench/method/__init__.py CHANGED Viewed

@@ -53,7 +53,7 @@ _import_structure = {
         "PWEMoExactParetoOptimalForCLIP",
     ],
     "ada_svd": ["AdaSVDMergingForCLIPVisionModel"],
-    "DOGE_TA": ["DOGE_TA_Algorithm"],
+    "doge_ta": ["DOGE_TA_Algorithm"],
     "task_singular_vector": ["TaskSingularVectorMerging"],
     "isotropic_merging": [
         "ISO_C_Merge",  # alias
@@ -62,6 +62,11 @@ _import_structure = {
         "IsotropicMergingInCommonSubspace",
     ],
     "opcm": ["OPCMForCLIP"],
+    "gossip": [
+        "CLIPLayerWiseGossipAlgorithm",
+        "CLIPTaskWiseGossipAlgorithm",
+        "FlanT5LayerWiseGossipAlgorithm",
+    ],
     # plug-and-play model merging methods
     "concrete_subspace": [
         "ConcreteTaskArithmeticAlgorithmForCLIP",
@@ -128,7 +133,7 @@ if TYPE_CHECKING:
     from .dare import DareSimpleAverage, DareTaskArithmetic, DareTiesMerging
     from .dawe import DataAdaptiveWeightEnsemblingForCLIP
     from .depth_upscaling import DepthUpscalingAlgorithm, DepthUpscalingForLlama
-    from .DOGE_TA import DOGE_TA_Algorithm
+    from .doge_ta import DOGE_TA_Algorithm
     from .dummy import DummyAlgorithm
     from .ensemble import (
         MaxModelPredictorAlgorithm,
@@ -136,6 +141,11 @@ if TYPE_CHECKING:
         WeightedEnsembleAlgorithm,
     )
     from .fisher_merging import FisherMergingForCLIPVisionModel
+    from .gossip import (
+        CLIPLayerWiseGossipAlgorithm,
+        CLIPTaskWiseGossipAlgorithm,
+        FlanT5LayerWiseGossipAlgorithm,
+    )
     from .isotropic_merging import (
         ISO_C_Merge,
         ISO_CTS_Merge,

fusion_bench/method/adamerging/clip_layer_wise_adamerging.py CHANGED Viewed

@@ -9,7 +9,7 @@ fusion_bench \
     modelpool=clip-vit-base-patch32_TA8 \
     taskpool=clip-vit-classification_TA8 \
     fabric.loggers.root_dir=outputs/logs/ViT-B-32 \
-    fabric.loggers.name=clip_layer_wise_adamerging_adam
+    fabric.loggers.name=clip_layer_wise_adamerging_adamerging
 ```
 """

fusion_bench/method/adamerging/clip_task_wise_adamerging.py CHANGED Viewed

@@ -13,41 +13,13 @@ from fusion_bench.modelpool import CLIPVisionModelPool
 from fusion_bench.models.hf_clip import HFCLIPClassifier
 from fusion_bench.tasks.clip_classification import get_classnames_and_templates
 from fusion_bench.utils import timeit_context
+from fusion_bench.utils.data import InfiniteDataLoader
 from .task_wise_adamerging import TaskWiseAdaMergingAlgorithm
 log = logging.getLogger(__name__)
-class InfiniteDataLoader:
-    """
-    A wrapper class for DataLoader to create an infinite data loader.
-    This is useful in case we are only interested in the number of steps and not the number of epochs.
-    This class wraps a DataLoader and provides an iterator that resets
-    when the end of the dataset is reached, creating an infinite loop.
-    Attributes:
-        data_loader (DataLoader): The DataLoader to wrap.
-        data_iter (iterator): An iterator over the DataLoader.
-    """
-    def __init__(self, data_loader):
-        self.data_loader = data_loader
-        self.data_iter = iter(data_loader)
-    def __iter__(self):
-        return self
-    def __next__(self):
-        try:
-            data = next(self.data_iter)
-        except StopIteration:
-            self.data_iter = iter(self.data_loader)  # Reset the data loader
-            data = next(self.data_iter)
-        return data
 class CLIPTaskWiseAdaMergingAlgorithm(TaskWiseAdaMergingAlgorithm):
     """
     A class for task-wise adaptive merging of CLIP models.

fusion_bench/method/doge_ta/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # flake8: noqa F401
2	+ from .doge_ta import DOGE_TA_Algorithm

fusion_bench/method/{DOGE_TA → doge_ta}/clip_layer_wise_adamerging.py RENAMED Viewed

@@ -9,7 +9,7 @@ fusion_bench \
     modelpool=clip-vit-base-patch32_TA8 \
     taskpool=clip-vit-classification_TA8 \
     fabric.loggers.root_dir=outputs/logs/ViT-B-32 \
-    fabric.loggers.name=clip_layer_wise_adamerging_adam
+    fabric.loggers.name=clip_layer_wise_adamerging_adamerging
 ```
 """

fusion_bench/method/{DOGE_TA/DOGE_TA.py → doge_ta/doge_ta.py} RENAMED Viewed

@@ -7,7 +7,7 @@ Example Usage:
 ```bash
 fusion_bench \
-    method=DOGE_TA/DOGE_TA \
+    method=doge_ta/doge_ta \
     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only \
     taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8

fusion_bench/method/fisher_merging/fisher_merging.py CHANGED Viewed

@@ -12,6 +12,7 @@ from torch import Tensor, nn
 from tqdm.autonotebook import tqdm
 from fusion_bench.method import BaseAlgorithm
+from fusion_bench.mixins import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
 log = logging.getLogger(__name__)
@@ -352,7 +353,7 @@ def filter_state_dict(
     return filtered_state_dict
-class FisherMergingAlgorithm(BaseAlgorithm):
+class FisherMergingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
     """
     Implements the Fisher Merging Algorithm.
@@ -432,25 +433,36 @@ class FisherMergingAlgorithm(BaseAlgorithm):
             for param_name in param_names_to_merge:
                 models_to_merge_param_dict[param_name].append(param_dict[param_name])
-            model_to_merge_fisher_weights = self.get_fisher_weights(
-                model_name=name,
-                model=model,
-                train_dataset=modelpool.load_train_dataset(name),
-                param_names_to_merge=param_names_to_merge,
-            )
+            with (
+                self.profile("merging models"),
+                self.profile("computing fisher weights"),
+            ):
+                model_to_merge_fisher_weights = self.get_fisher_weights(
+                    model_name=name,
+                    model=model,
+                    train_dataset=modelpool.load_train_dataset(name),
+                    param_names_to_merge=param_names_to_merge,
+                )
-            models_to_merge_fisher_weights_list.append(model_to_merge_fisher_weights)
+                models_to_merge_fisher_weights_list.append(
+                    model_to_merge_fisher_weights
+                )
-        merged_params = merging_with_fisher_weights(
-            models_to_merge_param_dict=models_to_merge_param_dict,
-            models_to_merge_fisher_weights_list=models_to_merge_fisher_weights_list,
-            fisher_scaling_coefficients=torch.ones(len(modelpool)) / len(modelpool),
-            normalize_fisher_weight=self.config.get("normalize_fisher_weight", True),
-            minimal_fisher_weight=self.config.get("minimal_fisher_weight", 1e-6),
-        )
+        with self.profile("merging models"):
+            merged_params = merging_with_fisher_weights(
+                models_to_merge_param_dict=models_to_merge_param_dict,
+                models_to_merge_fisher_weights_list=models_to_merge_fisher_weights_list,
+                fisher_scaling_coefficients=torch.ones(len(modelpool)) / len(modelpool),
+                normalize_fisher_weight=self.config.get(
+                    "normalize_fisher_weight", True
+                ),
+                minimal_fisher_weight=self.config.get("minimal_fisher_weight", 1e-6),
+            )
+            merged_model = modelpool.load_model("_pretrained_")
+            merged_model.load_state_dict(merged_params, strict=False)
-        merged_model = modelpool.load_model("_pretrained_")
-        merged_model.load_state_dict(merged_params, strict=False)
+        self.print_profile_summary()
         return merged_model
     def get_fisher_weights(

fusion_bench/method/gossip/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .clip_layer_wise_gossip import CLIPLayerWiseGossipAlgorithm
+from .clip_task_wise_gossip import CLIPTaskWiseGossipAlgorithm
+from .flan_t5_layer_wise_gossip import FlanT5LayerWiseGossipAlgorithm

fusion_bench/method/gossip/clip_layer_wise_gossip.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+Example Usage:
+```bash
+fusion_bench \
+    method=adamerging \
+        method.name=clip_layer_wise_adamerging \
+        method.save_merging_weights=merging_weights.pt \
+    modelpool=clip-vit-base-patch32_TA8 \
+    taskpool=clip-vit-classification_TA8 \
+    fabric_logger.root_dir=outputs/logs/ViT-B-32 \
+    fabric_logger.name=clip_layer_wise_adamerging_adam
+```
+"""
+import functools
+import logging
+from fusion_bench.mixins import CLIPClassificationMixin
+from .layer_wise_gossip import LayerWiseGossipAlgorithm
+log = logging.getLogger(__name__)
+class CLIPLayerWiseGossipAlgorithm(
+    CLIPClassificationMixin,
+    LayerWiseGossipAlgorithm,
+):
+    def on_test_time_adaptation_start(self):
+        """
+        Here we load the CLIP processor and construct the zero-shot classification head for each task.
+        """
+        if self.whether_setup_zero_shot_classification_head == False:
+            self.setup_zero_shot_classification_head()
+    @functools.cache
+    def get_shuffled_test_loader_iter(self, task: str):
+        return super().get_shuffled_test_loader_iter(
+            task,
+            batch_size=self.config.batch_size,
+            num_workers=self.config.num_workers,
+        )

fusion_bench/method/gossip/clip_task_wise_gossip.py ADDED Viewed

@@ -0,0 +1,190 @@
+import functools
+import logging
+import os
+import torch
+from omegaconf import DictConfig
+from torch import Tensor
+from torch.utils.data import DataLoader
+from transformers import CLIPModel, CLIPProcessor
+from fusion_bench.dataset import CLIPDataset
+from fusion_bench.modelpool import CLIPVisionModelPool
+from fusion_bench.models.hf_clip import HFCLIPClassifier
+from fusion_bench.tasks.clip_classification import get_classnames_and_templates
+from fusion_bench.utils import timeit_context
+from .task_wise_gossip import TaskWiseGossipAlgorithm
+log = logging.getLogger(__name__)
+class InfiniteDataLoader:
+    """
+    A wrapper class for DataLoader to create an infinite data loader.
+    This is useful in case we are only interested in the number of steps and not the number of epochs.
+    This class wraps a DataLoader and provides an iterator that resets
+    when the end of the dataset is reached, creating an infinite loop.
+    Attributes:
+        data_loader (DataLoader): The DataLoader to wrap.
+        data_iter (iterator): An iterator over the DataLoader.
+    """
+    def __init__(self, data_loader):
+        self.data_loader = data_loader
+        self.data_iter = iter(data_loader)
+    def __iter__(self):
+        return self
+    def __next__(self):
+        try:
+            data = next(self.data_iter)
+        except StopIteration:
+            self.data_iter = iter(self.data_loader)  # Reset the data loader
+            data = next(self.data_iter)
+        return data
+class CLIPTaskWiseGossipAlgorithm(TaskWiseGossipAlgorithm):
+    """
+    A class for task-wise adaptive merging of CLIP models.
+    This class extends the TaskWiseGossipAlgorithm to provide specific
+    functionality for CLIP models, including loading datasets, constructing
+    zero-shot classification heads, and computing logits.
+    Attributes:
+        modelpool (CLIPVisionModelPool): The model pool containing CLIP models.
+        _clip_processor (CLIPProcessor): The CLIP processor for preparing inputs.
+        zeroshot_weights (dict): A dictionary to store zero-shot weights for each task.
+    """
+    modelpool: CLIPVisionModelPool = None
+    _clip_processor: CLIPProcessor = None
+    zeroshot_weights = {}
+    def __init__(self, algorithm_config: DictConfig):
+        super().__init__(algorithm_config)
+    @functools.cache
+    def get_test_dataset(self, task: str):
+        """
+        Load the test dataset for the task.
+        This method is cached, so the dataset is loaded only once.
+        Args:
+            task (str): The name of the task.
+        Returns:
+            CLIPDataset: The test dataset for the task.
+        """
+        log.info(f"Loading test dataset: {task}")
+        dataset = self.modelpool.load_test_dataset(task)
+        dataset = CLIPDataset(dataset, self._clip_processor)
+        return dataset
+    @functools.cache
+    def get_shuffled_test_loader_iter(self, task: str):
+        """
+        Get an iterator over the shuffled test DataLoader for the task.
+        Args:
+            task (str): The name of the task.
+        Returns:
+            iterator: An iterator over the shuffled test DataLoader.
+        """
+        loader = DataLoader(
+            self.get_test_dataset(task),
+            batch_size=self.config.batch_size,
+            shuffle=True,
+            num_workers=self.config.num_workers,
+            pin_memory=True,
+        )
+        if self._fabric is not None:
+            loader = self._fabric.setup_dataloaders(loader)
+        return iter(InfiniteDataLoader(loader))
+    def on_test_time_adaptation_start(self):
+        """
+        Prepare for test-time adaptation.
+        This method loads the CLIP processor and constructs the zero-shot
+        classification head for each task.
+        """
+        if self._clip_processor is not None and self.zeroshot_weights is not None:
+            return  # this can be reused in Gossip
+        clip_model_config = self.modelpool.get_model_config("_pretrained_")
+        pretrained_path = (
+            clip_model_config.pretrained_model_name_or_path
+            if hasattr(clip_model_config, "pretrained_model_name_or_path")
+            else clip_model_config.path
+        )
+        with timeit_context("Loading CLIP processor and pretrained CLIP model."):
+            self._clip_processor = CLIPProcessor.from_pretrained(pretrained_path)
+            clip_model: CLIPModel = CLIPModel.from_pretrained(pretrained_path)
+            clip_classifier = HFCLIPClassifier(clip_model, self._clip_processor)
+            self.visual_projection = clip_model.visual_projection.requires_grad_(False)
+            self.logit_scale_exp = clip_model.logit_scale.exp()
+            if self._fabric is not None:
+                self.visual_projection = self._fabric.to_device(self.visual_projection)
+                self.logit_scale_exp = self._fabric.to_device(self.logit_scale_exp)
+        for task in self.modelpool.model_names:
+            cache_file = os.path.join(
+                self.config.cache_dir,
+                f"{os.path.basename(pretrained_path)}_{task}_zeroshot_weights.pt",
+            )
+            if os.path.exists(cache_file):
+                log.info(f"Loading cached zeroshot weights for task: {task}")
+                zeroshot_weights = torch.load(cache_file, map_location="cpu")
+            else:
+                log.info(f"Construct zero shot classification head for task: {task}")
+                classnames, templates = get_classnames_and_templates(task)
+                clip_classifier.set_classification_task(classnames, templates)
+                zeroshot_weights = clip_classifier.zeroshot_weights
+                log.info(f"save zeroshot weights to {cache_file}")
+                torch.save(zeroshot_weights, cache_file)
+            self.zeroshot_weights[task] = zeroshot_weights
+            if self._fabric is not None:
+                self.zeroshot_weights[task] = self._fabric.to_device(
+                    self.zeroshot_weights[task]
+                )
+    def compute_logits(self, module, batch, task: str) -> Tensor:
+        """
+        Compute the logits for the given batch and task.
+        This method computes the image embeddings, normalizes them, and calculates
+        the cosine similarity with the text embeddings to produce classification logits.
+        Args:
+            module (nn.Module): The model module.
+            batch (tuple): A batch of input data.
+            task (str): The name of the task.
+        Returns:
+            Tensor: The classification logits for the batch.
+        """
+        images, _ = batch
+        text_embeds = self.zeroshot_weights[task]
+        image_embeds = module(images)[1]
+        image_embeds = self.visual_projection(image_embeds)
+        # normalize embeddings
+        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
+        # cosine similarity
+        logits_per_text = (
+            torch.matmul(text_embeds, image_embeds.t()) * self.logit_scale_exp
+        )
+        logits_per_image = logits_per_text.t()
+        return logits_per_image

fusion_bench/method/gossip/entropy_loss.py ADDED Viewed

@@ -0,0 +1,25 @@
+import torch
+from torch import Tensor
+def entropy_loss(logits: Tensor, eps: float = 1e-8) -> Tensor:
+    """
+    Compute the entropy loss of a set of logits.
+    Args:
+        logits (Tensor): The logits to compute the entropy loss of.
+        eps (float): A small value to avoid log(0). Default is 1e-8.
+    Returns:
+        Tensor: The entropy loss of the logits.
+    """
+    # Ensure the logits tensor has 2 dimensions
+    assert (
+        logits.dim() == 2
+    ), f"Expected logits to have 2 dimensions, found {logits.dim()}, {logits.size()=}"
+    # Compute the softmax probabilities
+    probs = torch.softmax(logits, dim=-1)
+    # Compute the entropy loss
+    return -torch.sum(probs * torch.log(probs + eps), dim=-1).mean()

fusion-bench 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl