PyPI - fusion-bench - Versions diffs - 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (199) hide show

fusion_bench/method/regmean/regmean.py CHANGED Viewed

@@ -13,6 +13,7 @@ from torch import Tensor, nn
 from tqdm.autonotebook import tqdm
 from fusion_bench.method import BaseAlgorithm
+from fusion_bench.mixins import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
 log = logging.getLogger(__name__)
@@ -279,7 +280,7 @@ def regmean_merging(
     return merged_params
-class RegMeanAlgorithm(BaseAlgorithm):
+class RegMeanAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
     _include_module_type = [nn.Linear]
     _config_mapping = {
         "num_regmean_examples": "num_regmean_examples",
@@ -342,24 +343,31 @@ class RegMeanAlgorithm(BaseAlgorithm):
                 )
                 assert len(linear_modules_to_merge) > 0, "No linear modules to merge"
-                regmean_weights = self.get_regmean_weights(
-                    name,
-                    model,
-                    train_dataset=modelpool.load_train_dataset(name),
-                    linear_modules_to_merge=linear_modules_to_merge,
-                )
-                models_to_merge_regmean_weights_list.append(regmean_weights)
+                with (
+                    self.profile("merging models"),
+                    self.profile("computing regmean weights"),
+                ):
+                    regmean_weights = self.get_regmean_weights(
+                        name,
+                        model,
+                        train_dataset=modelpool.load_train_dataset(name),
+                        linear_modules_to_merge=linear_modules_to_merge,
+                    )
+                    models_to_merge_regmean_weights_list.append(regmean_weights)
+        with self.profile("merging models"):
+            # merging with regmean weights
+            merged_params = merging_with_regmean_weights(
+                models_to_merge_param_dict=models_to_merge_param_dict,
+                models_to_merge_regmean_weights_list=models_to_merge_regmean_weights_list,
+                reduce_non_diagonal_ratio=self.reduce_non_diagonal_ratio,
+                weight_transpose=self.config.get("weight_transpose", True),
+            )
-        # merging with regmean weights
-        merged_params = merging_with_regmean_weights(
-            models_to_merge_param_dict=models_to_merge_param_dict,
-            models_to_merge_regmean_weights_list=models_to_merge_regmean_weights_list,
-            reduce_non_diagonal_ratio=self.reduce_non_diagonal_ratio,
-            weight_transpose=self.config.get("weight_transpose", True),
-        )
+            merged_model = modelpool.load_model("_pretrained_")
+            merged_model.load_state_dict(merged_params, strict=False)
-        merged_model = modelpool.load_model("_pretrained_")
-        merged_model.load_state_dict(merged_params, strict=False)
+        self.print_profile_summary()
         return merged_model
     def on_regmean_start(self):

fusion_bench/method/smile_upscaling/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # flake8: noqa F401
 from .singular_projection_merging import SingularProjectionMergingAlgorithm
-from .smile_upscaling import SmileUpscalingAlgorithm
+from .smile_upscaling import SmileMoELinear, SmileUpscalingAlgorithm

fusion_bench/method/smile_upscaling/smile_upscaling.py CHANGED Viewed

@@ -442,16 +442,19 @@ class SmileUpscalingAlgorithm(
             print_parameters(model)
             return model
-        with self.profile("load pretrained model"):
-            pretrained_model = modelpool.load_model("_pretrained_")
-        with self.profile("load fine-tuned model"):
-            finetuned_models = [
-                m for m in tqdm(modelpool.models(), total=len(modelpool.model_names))
-            ]
-        if self.config.device == "cuda" and torch.cuda.is_available():
-            pretrained_model = pretrained_model.cuda()
-            finetuned_models = [m.cuda() for m in finetuned_models]
+        with self.profile("loading model"):
+            # load models and move to GPU if available
+            with self.profile("load pretrained model"):
+                pretrained_model = modelpool.load_model("_pretrained_")
+            with self.profile("load fine-tuned model"):
+                finetuned_models = [
+                    m
+                    for m in tqdm(modelpool.models(), total=len(modelpool.model_names))
+                ]
+            if self.config.device == "cuda" and torch.cuda.is_available():
+                pretrained_model = pretrained_model.cuda()
+                finetuned_models = [m.cuda() for m in finetuned_models]
         with self.profile("merge model"):
             model = self.merge(pretrained_model, finetuned_models)

fusion_bench/method/surgery/clip_layer_wise_adamerging_surgery.py CHANGED Viewed

@@ -85,7 +85,14 @@ class CLIPLayerWiseAdaMergingSurgeryAlgorithm(
         if self.config.weights is not None:
             # skip the test-time adaptation
+            merge_weight: torch.Tensor = torch.load(self.config.weights)
+            module.merge_weight.data = merge_weight.to(
+                device=module.merge_weight.device
+            )
             merged_model = copy.deepcopy(module.merge_and_unload())
+            # setup the zero-shot classification head
+            self.on_test_time_adaptation_start()
         else:
             with self.profile("test-time adaptation"):
                 module = self.test_time_adaptation(module)

fusion_bench/method/task_arithmetic/task_arithmetic.py CHANGED Viewed

@@ -6,7 +6,7 @@ http://arxiv.org/abs/2212.04089
 import logging
 from copy import deepcopy
-from typing import Dict, List, Mapping, TypeVar, Union  # noqa: F401
+from typing import Dict, List, Mapping, Optional, TypeVar, Union  # noqa: F401
 import torch
 from torch import nn
@@ -19,18 +19,18 @@ from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_mul,
     state_dict_sub,
 )
-from fusion_bench.utils.type import StateDictType
+from fusion_bench.utils.type import StateDictType, TorchModelType
 log = logging.getLogger(__name__)
 @torch.no_grad()
 def task_arithmetic_merge(
-    pretrained_model: nn.Module,
-    finetuned_models: List[nn.Module],
+    pretrained_model: TorchModelType,
+    finetuned_models: List[TorchModelType],
     scaling_factor: float,
     inplace: bool = True,
-) -> nn.Module:
+) -> TorchModelType:
     """
     Merges the task vectors from multiple fine-tuned models into a single pre-trained model.
@@ -46,15 +46,17 @@ def task_arithmetic_merge(
     """
     if not inplace:
         pretrained_model = deepcopy(pretrained_model)
-    task_vector: StateDictType = None
+    task_vector: Optional[StateDictType] = None
     # Calculate the total task vector
     for model in finetuned_models:
         if task_vector is None:
+            # calculate the task vector for the first model
             task_vector = state_dict_sub(
                 model.state_dict(keep_vars=True),
                 pretrained_model.state_dict(keep_vars=True),
             )
         else:
+            # calculate the task vector for the remaining models
             task_vector = state_dict_add(
                 task_vector,
                 state_dict_sub(

fusion_bench/method/ties_merging/ties_merging.py CHANGED Viewed

@@ -16,6 +16,7 @@ from torch import Tensor, nn
 from fusion_bench.compat.modelpool import to_modelpool
 from fusion_bench.method import BaseAlgorithm
+from fusion_bench.mixins import SimpleProfilerMixin
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils.type import StateDictType
@@ -24,7 +25,7 @@ from .ties_merging_utils import state_dict_to_vector, ties_merging, vector_to_st
 log = logging.getLogger(__name__)
-class TiesMergingAlgorithm(BaseAlgorithm):
+class TiesMergingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
     """
     TiesMergingAlgorithm is a class for fusing multiple models using the TIES merging technique.
@@ -84,34 +85,38 @@ class TiesMergingAlgorithm(BaseAlgorithm):
         scaling_factor = self.scaling_factor
         threshold = self.threshold
-        # Load the pretrained model
-        pretrained_model = modelpool.load_model("_pretrained_")
-        # Load the state dicts of the models
-        ft_checks: List[StateDictType] = [
-            modelpool.load_model(model_name).state_dict(keep_vars=True)
-            for model_name in modelpool.model_names
-        ]
-        ptm_check: StateDictType = pretrained_model.state_dict(keep_vars=True)
-        # Compute the task vectors
-        flat_ft: Tensor = torch.vstack(
-            [state_dict_to_vector(check, remove_keys) for check in ft_checks]
-        )
-        flat_ptm: Tensor = state_dict_to_vector(ptm_check, remove_keys)
-        tv_flat_checks = flat_ft - flat_ptm
-        # Perform TIES Merging
-        merged_tv = ties_merging(
-            tv_flat_checks,
-            reset_thresh=threshold,
-            merge_func=merge_func,
-        )
-        merged_check = flat_ptm + scaling_factor * merged_tv
-        merged_state_dict = vector_to_state_dict(
-            merged_check, ptm_check, remove_keys=remove_keys
-        )
-        # Load the merged state dict into the pretrained model
-        pretrained_model.load_state_dict(merged_state_dict)
+        with self.profile("loading models"):
+            # Load the pretrained model
+            pretrained_model = modelpool.load_model("_pretrained_")
+            # Load the state dicts of the models
+            ft_checks: List[StateDictType] = [
+                modelpool.load_model(model_name).state_dict(keep_vars=True)
+                for model_name in modelpool.model_names
+            ]
+            ptm_check: StateDictType = pretrained_model.state_dict(keep_vars=True)
+        with self.profile("merging models"):
+            # Compute the task vectors
+            flat_ft: Tensor = torch.vstack(
+                [state_dict_to_vector(check, remove_keys) for check in ft_checks]
+            )
+            flat_ptm: Tensor = state_dict_to_vector(ptm_check, remove_keys)
+            tv_flat_checks = flat_ft - flat_ptm
+            # Perform TIES Merging
+            merged_tv = ties_merging(
+                tv_flat_checks,
+                reset_thresh=threshold,
+                merge_func=merge_func,
+            )
+            merged_check = flat_ptm + scaling_factor * merged_tv
+            merged_state_dict = vector_to_state_dict(
+                merged_check, ptm_check, remove_keys=remove_keys
+            )
+            # Load the merged state dict into the pretrained model
+            pretrained_model.load_state_dict(merged_state_dict)
+        self.print_profile_summary()
         return pretrained_model

fusion_bench/method/we_moe/we_moe.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import cast  # noqa: F401
 import lightning as L
 import lightning.fabric.wrappers
 import torch
-from lightning.pytorch.profilers import SimpleProfiler
 from omegaconf import DictConfig
 from torch import Tensor
 from torch.utils.data import DataLoader
@@ -13,6 +12,7 @@ from tqdm.autonotebook import tqdm
 from fusion_bench.compat.method.base_algorithm import ModelFusionAlgorithm
 from fusion_bench.compat.modelpool import ModelPool
+from fusion_bench.mixins import SimpleProfilerMixin
 from fusion_bench.models.we_moe import WeightEnsemblingMoE
 from fusion_bench.utils import timeit_context
 from fusion_bench.utils.parameters import print_parameters
@@ -34,7 +34,10 @@ def entropy_loss(logits: Tensor) -> Tensor:
     return -torch.sum(probs * torch.log(probs + 1e-8), dim=-1).mean()
-class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
+class WeightEnsemblingMoEAlgorithm(
+    ModelFusionAlgorithm,
+    SimpleProfilerMixin,
+):
     """
     Algorithm for fusing models using Weight Ensembling Mixture of Experts (MoE).
@@ -44,7 +47,6 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
     Attributes:
         _fabric (L.Fabric): The fabric for distributed training.
         modelpool (ModelPool): The pool of models to be fused.
-        profiler (SimpleProfiler): The profiler for measuring performance.
     """
     _fabric: L.Fabric = None
@@ -66,9 +68,6 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
             self._fabric.launch()
         else:
             assert "No CUDA device available."
-        self.profiler = SimpleProfiler(
-            self.config.get("cache_dir", "outputs"), "we_moe_profiler.txt"
-        )
     @abstractmethod
     def load_checkpoint(self, model, checkpoint):
@@ -177,9 +176,9 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
         for step_idx in pbar:
             if self.config.use_grad_accumulate:
                 for task in self.modelpool.model_names:
-                    with self.profiler.profile("data time"):
+                    with self.profile("data time"):
                         batch = next(self.get_shuffled_test_loader_iter(task))
-                    with self.profiler.profile("forward pass"):
+                    with self.profile("forward pass"):
                         logits = self.compute_logits(module, batch, task)
                         assert (
                             logits.dim() == 2
@@ -187,23 +186,23 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
                         loss = entropy_loss(logits)
                     # .backward() accumulates when .zero_grad() wasn't called
                     # this can save memory
-                    with self.profiler.profile("backward pass"):
+                    with self.profile("backward pass"):
                         self._fabric.backward(loss, retain_graph=True)
             else:
                 loss = 0
                 for task in self.modelpool.model_names:
-                    with self.profiler.profile("data time"):
+                    with self.profile("data time"):
                         batch = next(self.get_shuffled_test_loader_iter(task))
-                    with self.profiler.profile("forward pass"):
+                    with self.profile("forward pass"):
                         logits = self.compute_logits(module, batch, task)
                         assert (
                             logits.dim() == 2
                         ), f"Expected logits to be 2D, got {logits.dim()}"
                         loss = loss + entropy_loss(logits)
-                with self.profiler.profile("backward pass"):
+                with self.profile("backward pass"):
                     self._fabric.backward(loss, retain_graph=True)
-            with self.profiler.profile("optimizer step"):
+            with self.profile("optimizer step"):
                 optimizer.step()
                 optimizer.zero_grad()
@@ -232,7 +231,7 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
             )
             self.load_checkpoint(moe_model, self.config.checkpoint)
         else:
-            with self.profiler.profile("test-time adaptation"):
+            with self.profile("test-time adaptation"):
                 moe_model = self.test_time_adaptation(moe_model)
             if self.config.get("save_checkpoint", False):
                 log.info(f"save checkpoint to {self.config.save_checkpoint}")
@@ -243,5 +242,5 @@ class WeightEnsemblingMoEAlgorithm(ModelFusionAlgorithm):
         # enable sample-wise adaptation
         moe_model.batch_reduce = False
-        print(self.profiler.summary())
+        self.print_profile_summary()
         return moe_model

fusion_bench/mixins/__init__.py CHANGED Viewed

@@ -6,20 +6,23 @@ from typing_extensions import TYPE_CHECKING
 from fusion_bench.utils.lazy_imports import LazyImporter
 _import_structure = {
+    "clip_classification": ["CLIPClassificationMixin"],
+    "fabric_training": ["FabricTrainingMixin"],
+    "hydra_config": ["HydraConfigMixin"],
     "lightning_fabric": ["LightningFabricMixin"],
+    "openclip_classification": ["OpenCLIPClassificationMixin"],
     "serialization": ["YAMLSerializationMixin", "BaseYAMLSerializableModel"],
     "simple_profiler": ["SimpleProfilerMixin"],
-    "clip_classification": ["CLIPClassificationMixin"],
-    "fabric_training": ["FabricTrainingMixin"],
 }
 if TYPE_CHECKING:
     from .clip_classification import CLIPClassificationMixin
     from .fabric_training import FabricTrainingMixin
+    from .hydra_config import HydraConfigMixin
     from .lightning_fabric import LightningFabricMixin
+    from .openclip_classification import OpenCLIPClassificationMixin
     from .serialization import BaseYAMLSerializableModel, YAMLSerializationMixin
     from .simple_profiler import SimpleProfilerMixin
 else:
     sys.modules[__name__] = LazyImporter(
         __name__,

fusion_bench/mixins/hydra_config.py ADDED Viewed

@@ -0,0 +1,49 @@
+import logging
+import os
+from copy import deepcopy
+from pathlib import Path
+from typing import Dict, List, Optional, Union
+import hydra.core.global_hydra
+from hydra import compose, initialize
+from omegaconf import DictConfig, OmegaConf
+from fusion_bench.utils import import_object, instantiate
+from fusion_bench.utils.instantiate import set_print_function_call
+log = logging.getLogger(__name__)
+class HydraConfigMixin:
+    """
+    A mixin for classes that need to be instantiated from a config file.
+    """
+    @classmethod
+    def from_config(
+        cls,
+        config_name: Union[str, Path],
+        overrides: Optional[List[str]] = None,
+    ):
+        if not hydra.core.global_hydra.GlobalHydra.instance().is_initialized():
+            raise RuntimeError("Hydra is not initialized.")
+        else:
+            cfg = compose(config_name=config_name, overrides=overrides)
+        config_groups = config_name.split("/")[:-1]
+        for config_group in config_groups:
+            cfg = cfg[config_group]
+        if "_target_" in cfg:
+            # if the config has a _target_ key, check if it is equal to the class name
+            target_cls = import_object(cfg["_target_"])
+            if target_cls != cls:
+                log.warning(
+                    f"The _target_ key in the config is {cfg['_target_']}, but the class name is {cls.__name__}."
+                )
+            with set_print_function_call(False):
+                obj = instantiate(cfg)
+        else:
+            obj = cls(**cfg)
+        return obj

fusion_bench/mixins/openclip_classification.py ADDED Viewed

@@ -0,0 +1,11 @@
+import logging
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.models.open_clip import ImageClassifier, ImageEncoder
+log = logging.getLogger(__name__)
+class OpenCLIPClassificationMixin(LightningFabricMixin):
+    _train_processor = None
+    _test_processor = None

fusion_bench/mixins/simple_profiler.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from contextlib import contextmanager
-from typing import Generator
+from typing import Generator, Optional
 from lightning.fabric.utilities.rank_zero import rank_zero_only
 from lightning.pytorch.profilers import SimpleProfiler
@@ -70,7 +70,9 @@ class SimpleProfilerMixin:
         self.profiler.stop(action_name)
     @rank_zero_only
-    def print_profile_summary(self):
+    def print_profile_summary(self, title: Optional[str] = None):
+        if title is not None:
+            print(title)
         print(self.profiler.summary())
     def __del__(self):

fusion_bench/modelpool/__init__.py CHANGED Viewed

@@ -6,12 +6,13 @@ from fusion_bench.utils.lazy_imports import LazyImporter
 _import_structure = {
     "base_pool": ["BaseModelPool"],
+    "causal_lm": ["CausalLMPool", "CausalLMBackbonePool"],
     "clip_vision": ["CLIPVisionModelPool"],
     "nyuv2_modelpool": ["NYUv2ModelPool"],
     "huggingface_automodel": ["AutoModelPool"],
-    "causal_lm": ["CausalLMPool", "CausalLMBackbonePool"],
     "seq2seq_lm": ["Seq2SeqLMPool"],
     "PeftModelForSeq2SeqLM": ["PeftModelForSeq2SeqLMPool"],
+    "openclip_vision": ["OpenCLIPVisionModelPool"],
     "huggingface_gpt2_classification": [
         "HuggingFaceGPT2ClassificationPool",
         "GPT2ForSequenceClassificationPool",
@@ -30,6 +31,7 @@ if TYPE_CHECKING:
         HuggingFaceGPT2ClassificationPool,
     )
     from .nyuv2_modelpool import NYUv2ModelPool
+    from .openclip_vision import OpenCLIPVisionModelPool
     from .PeftModelForSeq2SeqLM import PeftModelForSeq2SeqLMPool
     from .seq2seq_lm import Seq2SeqLMPool
     from .seq_classification_lm import SeqenceClassificationModelPool

fusion_bench/modelpool/base_pool.py CHANGED Viewed

@@ -7,7 +7,7 @@ from omegaconf import DictConfig
 from torch import nn
 from torch.utils.data import Dataset
-from fusion_bench.mixins import BaseYAMLSerializableModel
+from fusion_bench.mixins import BaseYAMLSerializableModel, HydraConfigMixin
 from fusion_bench.utils import instantiate, timeit_context
 __all__ = ["BaseModelPool"]
@@ -15,7 +15,7 @@ __all__ = ["BaseModelPool"]
 log = logging.getLogger(__name__)
-class BaseModelPool(BaseYAMLSerializableModel):
+class BaseModelPool(BaseYAMLSerializableModel, HydraConfigMixin):
     """
     A class for managing and interacting with a pool of models along with their associated datasets or other specifications. For example, a model pool may contain multiple models, each with its own training, validation, and testing datasets. As for the specifications, a vision model pool may contain image preprocessor, and a language model pool may contain a tokenizer.

fusion_bench/modelpool/openclip_vision/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .modelpool import OpenCLIPVisionModelPool

fusion-bench 0.2.11__py3-none-any.whl → 0.2.13__py3-none-any.whl

fusion-bench 0.2.11py3-none-any.whl → 0.2.13py3-none-any.whl