PyPI - fusion-bench - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

fusion_bench/__main__.py +4 -0
fusion_bench/dataset/fer2013.py +1 -0
fusion_bench/method/__init__.py +26 -4
fusion_bench/method/classification/__init__.py +1 -0
fusion_bench/method/classification/clip_finetune.py +1 -3
fusion_bench/method/classification/continual_clip_finetune.py +297 -0
fusion_bench/method/dare/__init__.py +1 -0
fusion_bench/method/dare/task_arithmetic.py +14 -7
fusion_bench/method/dare/ties_merging.py +100 -0
fusion_bench/method/isotropic_merging/__init__.py +15 -0
fusion_bench/method/isotropic_merging/iso.py +114 -0
fusion_bench/method/isotropic_merging/iso_utils.py +176 -0
fusion_bench/method/opcm/__init__.py +4 -0
fusion_bench/method/opcm/opcm.py +277 -0
fusion_bench/method/opcm/task_arithmetic.py +115 -0
fusion_bench/method/opcm/ties_merging.py +156 -0
fusion_bench/method/opcm/utils.py +73 -0
fusion_bench/method/opcm/weight_average.py +120 -0
fusion_bench/method/slerp/slerp.py +1 -1
fusion_bench/method/task_singular_vector/TSVM.py +22 -2
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +91 -93
fusion_bench/method/ties_merging/ties_merging.py +10 -0
fusion_bench/metrics/continual_learning/backward_transfer.py +22 -0
fusion_bench/mixins/clip_classification.py +4 -1
fusion_bench/programs/fabric_fusion_program.py +22 -11
fusion_bench/scripts/cli.py +1 -0
fusion_bench/taskpool/base_pool.py +1 -1
fusion_bench/taskpool/clip_vision/taskpool.py +12 -7
fusion_bench/utils/__init__.py +2 -1
fusion_bench/utils/dict.py +43 -0
fusion_bench/utils/expr.py +90 -0
fusion_bench/utils/fabric.py +17 -0
fusion_bench/utils/instantiate.py +7 -1
fusion_bench/utils/json.py +30 -0
fusion_bench/utils/parameters.py +27 -7
fusion_bench/utils/path.py +15 -0
fusion_bench/utils/plot/color_data.py +1726 -0
fusion_bench/utils/rich_utils.py +15 -0
fusion_bench/utils/set.py +8 -0
fusion_bench/utils/tensorboard.py +51 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/METADATA +17 -18
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/RECORD +58 -29
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/WHEEL +1 -1
fusion_bench_config/method/classification/clip_continual_finetune.yaml +28 -0
fusion_bench_config/method/classification/clip_finetune.yaml +26 -0
fusion_bench_config/method/clip_finetune.yaml +2 -2
fusion_bench_config/method/dare/ties_merging.yaml +15 -0
fusion_bench_config/method/isotropic_merging/iso_c.yaml +4 -0
fusion_bench_config/method/isotropic_merging/iso_cts.yaml +5 -0
fusion_bench_config/method/opcm/opcm.yaml +12 -0
fusion_bench_config/method/opcm/task_arithmetic.yaml +12 -0
fusion_bench_config/method/opcm/ties_merging.yaml +18 -0
fusion_bench_config/method/opcm/weight_average.yaml +10 -0
fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +6 -0
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +18 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/top_level.txt +0 -0

fusion_bench/method/opcm/ties_merging.py ADDED Viewed

@@ -0,0 +1,156 @@
+import os
+import random
+import time
+from collections import defaultdict
+from copy import deepcopy
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, cast
+import lightning as L
+import numpy as np
+import torch
+from omegaconf import DictConfig
+from torch import Tensor, nn
+from tqdm.auto import tqdm
+from transformers import CLIPVisionModel
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.method.ties_merging.ties_merging_utils import (
+    state_dict_to_vector,
+    ties_merging,
+    vector_to_state_dict,
+)
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.taskpool import CLIPVisionModelTaskPool
+from fusion_bench.utils.json import load_from_json, save_to_json
+from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_sub
+if TYPE_CHECKING:
+    from torch.utils.tensorboard import SummaryWriter
+class ContinualTiesMergingForCLIP(BaseAlgorithm, LightningFabricMixin):
+    def __init__(
+        self,
+        scaling_factor: float,
+        threshold: float,
+        remove_keys: Optional[List[str]] = None,
+        merge_func: Literal["sum", "mean", "max"] = "sum",
+        shuffle_order: bool = True,
+        seed: Optional[int] = None,
+        save_on_every_step: bool = True,
+        evaluate_on_every_step: bool = False,
+        **kwargs,
+    ):
+        """
+        Continual Model Merging via Ties-Merging.
+        Args:
+            scaling_factor (float): the scaling factor to use.
+            shuffle_order (bool): whether to shuffle the order of the models.
+            seed (Optional[int]): the seed to use.
+            save_on_every_step (bool): whether to save the merged model on every step.
+            evaluate_on_every_step (bool): whether to evaluate the merged model on every step.
+        """
+        self.scaling_factor = scaling_factor
+        self.threshold = threshold
+        self.remove_keys = remove_keys if remove_keys is not None else []
+        self.merge_func = merge_func
+        self.shuffle_order = shuffle_order
+        self.seed = seed
+        self.save_on_every_step = save_on_every_step
+        self.evaluate_on_every_step = evaluate_on_every_step
+        super().__init__(**kwargs)
+    @torch.no_grad()
+    def run(self, modelpool: BaseModelPool):
+        if self.seed is not None:
+            L.seed_everything(self.seed)
+        model_names = modelpool.model_names
+        if self.shuffle_order:
+            random.shuffle(model_names)
+        self.taskpool = cast(CLIPVisionModelTaskPool, self._program.taskpool)
+        self._test_datasets = deepcopy(self.taskpool._test_datasets)
+        """Configuration for the test datasets"""
+        # log the model names
+        if self.log_dir is not None:
+            save_to_json(model_names, Path(self.log_dir) / "model_names.json")
+            tensorboard_summarywriter: "SummaryWriter" = self.tensorboard_summarywriter
+            tensorboard_summarywriter.add_text(
+                "global/model_names", str(model_names), global_step=0
+            )
+        # get the average model
+        pretrained_model = modelpool.load_pretrained_model()
+        merged_model = deepcopy(pretrained_model)
+        for model_idx, model_name in tqdm(
+            enumerate(model_names), desc="Processing models"
+        ):
+            task_model = modelpool.load_model(model_name)
+            task_vector = state_dict_sub(
+                task_model.state_dict(),
+                pretrained_model.state_dict(),
+            )
+            if model_idx == 0:
+                # if is the first model, the merged task vector is equal to the task vector
+                ties_merging_state_dict = task_vector
+            else:
+                # if is not the first model, we need to merge the task vector with the previous merged task vector
+                merged_tv = state_dict_sub(
+                    merged_model.state_dict(),
+                    pretrained_model.state_dict(),
+                )
+                tv_flat_checks = torch.vstack(
+                    [
+                        state_dict_to_vector(merged_tv, remove_keys=self.remove_keys),
+                        state_dict_to_vector(task_vector, remove_keys=self.remove_keys),
+                    ]
+                )
+                # perform the TIES merging
+                ties_merging_tv = ties_merging(
+                    tv_flat_checks,
+                    reset_thresh=self.threshold,
+                    merge_func=self.merge_func,
+                )
+                # convert the merged task vector back to a state dict
+                ties_merging_state_dict = vector_to_state_dict(
+                    ties_merging_tv,
+                    merged_model.state_dict(),
+                    remove_keys=self.remove_keys,
+                )
+            for param_name, param in task_model.named_parameters():
+                if not param.requires_grad:
+                    continue
+                merged_param = merged_model.get_parameter(param_name)
+                new_param = (
+                    merged_param
+                    + self.scaling_factor * ties_merging_state_dict[param_name]
+                )
+                merged_model.get_parameter(param_name).data = new_param
+            if self.save_on_every_step:
+                self.save_merged_model(merged_model, model_idx)
+            if self.evaluate_on_every_step:
+                self.taskpool._is_setup = False
+                self.taskpool._test_datasets = DictConfig(
+                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
+                )
+                report = self.taskpool.evaluate(deepcopy(merged_model))
+                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+        return merged_model
+    def save_merged_model(self, merged_model: CLIPVisionModel, step: int):
+        os.makedirs(Path(self.log_dir) / "checkpoints", exist_ok=True)
+        torch.save(
+            merged_model.state_dict(),
+            Path(self.log_dir) / "checkpoints" / f"model_{step}.pth",
+        )

fusion_bench/method/opcm/utils.py ADDED Viewed

@@ -0,0 +1,73 @@
+from typing import Tuple
+import torch
+from torch import Tensor, nn
+from fusion_bench.utils.parameters import state_dict_to_vector
+from fusion_bench.utils.state_dict_arithmetic import state_dict_sub
+def _svd(w: Tensor, full_matrices=True) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Perform Singular Value Decomposition (SVD) on a tensor.
+    Args:
+        w (Tensor): The input tensor.
+        full_matrices (bool): Whether to compute the full-sized U and V matrices.
+    Returns:
+        Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
+    """
+    u, s, vh = torch.linalg.svd(
+        w, full_matrices=full_matrices, driver="gesvd" if w.is_cuda else None
+    )
+    v = vh.T
+    return u, s, v
+def svd(
+    w: Tensor, full_matrices=True, accelerator=None
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Perform SVD on a tensor, optionally using a specified accelerator.
+    Args:
+        w (Tensor): The input tensor.
+        full_matrices (bool): Whether to compute the full-sized U and V matrices.
+        accelerator (str): The device to perform the computation on.
+    Returns:
+        Tuple[Tensor, Tensor, Tensor]: The U, S, and V matrices from SVD.
+    """
+    if accelerator is None:
+        return _svd(w, full_matrices=full_matrices)
+    original_device = w.device
+    w = w.to(accelerator)
+    u, s, v = _svd(w)
+    return u.to(original_device), s.to(original_device), v.to(original_device)
+def frobenius_inner_product(w1: Tensor, w2: Tensor) -> Tensor:
+    return torch.trace(w1.T @ w2)
+def is_leaf_module(module: nn.Module) -> bool:
+    return len(list(module.children())) == 0
+def get_task_vector_norm(model: nn.Module, pretrained_model: nn.Module) -> Tensor:
+    """
+    Get the vector norm of the task model.
+    Args:
+        model (nn.Module): The task model.
+        pretrained_model (nn.Module): The pretrained model.
+    Returns:
+        Tensor: The vector norm of the task model.
+    """
+    return torch.linalg.norm(
+        state_dict_to_vector(
+            state_dict_sub(model.state_dict(), pretrained_model.state_dict())
+        )
+    )

fusion_bench/method/opcm/weight_average.py ADDED Viewed

@@ -0,0 +1,120 @@
+import os
+import random
+import time
+from collections import defaultdict
+from copy import deepcopy
+from pathlib import Path
+from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, cast
+import lightning as L
+import numpy as np
+import torch
+from omegaconf import DictConfig
+from torch import Tensor, nn
+from tqdm.auto import tqdm
+from transformers import CLIPVisionModel
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.taskpool import CLIPVisionModelTaskPool
+from fusion_bench.utils.json import load_from_json, save_to_json
+if TYPE_CHECKING:
+    from torch.utils.tensorboard import SummaryWriter
+class ContinualWeightAverageForCLIP(
+    BaseAlgorithm,
+    LightningFabricMixin,
+):
+    def __init__(
+        self,
+        shuffle_order: bool = True,
+        seed: Optional[int] = None,
+        save_on_every_step: bool = True,
+        evaluate_on_every_step: bool = False,
+        **kwargs,
+    ):
+        """
+        Continual Model Merging via Weight Average.
+        Args:
+            shuffle_order (bool): whether to shuffle the order of the models.
+            seed (Optional[int]): the seed to use.
+            save_on_every_step (bool): whether to save the merged model on every step.
+            evaluate_on_every_step (bool): whether to evaluate the merged model on every step.
+        """
+        self.shuffle_order = shuffle_order
+        self.seed = seed
+        self.save_on_every_step = save_on_every_step
+        self.evaluate_on_every_step = evaluate_on_every_step
+        super().__init__(**kwargs)
+    def run(self, modelpool: BaseModelPool):
+        if self.seed is not None:
+            L.seed_everything(self.seed)
+        model_names = modelpool.model_names
+        if self.shuffle_order:
+            random.shuffle(model_names)
+        self.taskpool = cast(CLIPVisionModelTaskPool, self._program.taskpool)
+        self._test_datasets = deepcopy(self.taskpool._test_datasets)
+        """Configuration for the test datasets"""
+        # log the model names
+        if self.log_dir is not None:
+            save_to_json(model_names, Path(self.log_dir) / "model_names.json")
+            tensorboard_summarywriter: "SummaryWriter" = self.tensorboard_summarywriter
+            tensorboard_summarywriter.add_text(
+                "global/model_names", str(model_names), global_step=0
+            )
+        # get the average model
+        merged_model = modelpool.load_model(model_names[0])
+        if self.evaluate_on_every_step:
+            self.taskpool._is_setup = False
+            self.taskpool._test_datasets = DictConfig(
+                {model_names[0]: self._test_datasets[model_names[0]]}
+            )
+            report = self.taskpool.evaluate(deepcopy(merged_model))
+            save_to_json(report, Path(self.log_dir) / "report_0.json")
+        if self.save_on_every_step:
+            self.save_merged_model(merged_model, 0)
+        for model_idx, model_name in tqdm(
+            enumerate(model_names[1:]), desc="Processing models"
+        ):
+            model_idx += 1
+            task_model = modelpool.load_model(model_name)
+            for param_name, param in task_model.named_parameters():
+                if not param.requires_grad:
+                    continue
+                task_param = param
+                merged_param = merged_model.get_parameter(param_name)
+                new_param = (merged_param * model_idx + task_param) / (model_idx + 1)
+                merged_model.get_parameter(param_name).data = new_param
+            if self.save_on_every_step:
+                self.save_merged_model(merged_model, model_idx)
+            if self.evaluate_on_every_step:
+                self.taskpool._is_setup = False
+                self.taskpool._test_datasets = DictConfig(
+                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
+                )
+                report = self.taskpool.evaluate(deepcopy(merged_model))
+                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+        return merged_model
+    def save_merged_model(self, merged_model: CLIPVisionModel, step: int):
+        os.makedirs(Path(self.log_dir) / "checkpoints", exist_ok=True)
+        merged_model.save_pretrained(
+            Path(self.log_dir) / "checkpoints" / f"merged_model_{step}"
+        )

fusion_bench/method/slerp/slerp.py CHANGED Viewed

@@ -51,7 +51,7 @@ class SlerpMergeAlgorithm(BaseAlgorithm):
     General purpose implementation of Slerp (Spherical Linear Interpolation) for PyTorch models.
     """
-    _config_mapping = BaseAlgorithm._config_mapping + {
+    _config_mapping = BaseAlgorithm._config_mapping | {
         "t": "t",
         "DOT_THRESHOLD": "DOT_THRESHOLD",
         "epsilon": "epsilon",

fusion_bench/method/task_singular_vector/TSVM.py CHANGED Viewed

@@ -9,15 +9,20 @@ fusion_bench \
 ```
 """
-from typing import List, Optional
+from typing import List, Optional, Union, Iterable
 import torch
 from torch import Tensor, nn
+from omegaconf import ListConfig
 from fusion_bench import BaseAlgorithm
 from fusion_bench.mixins import LightningFabricMixin
 from fusion_bench.utils import timeit_context
-from fusion_bench.utils.state_dict_arithmetic import state_dict_sub, state_dict_add
+from fusion_bench.utils.state_dict_arithmetic import (
+    state_dict_add,
+    state_dict_sub,
+    state_dict_mul,
+)
 from fusion_bench.utils.type import StateDictType
 from .utils import (
@@ -33,9 +38,11 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
     def __init__(
         self,
+        alpha: Union[float, Iterable[float]] = None,
         remove_keys: Optional[List[str]] = None,
         **kwargs,
     ):
+        self.alpha = alpha
         self.remove_keys = remove_keys if remove_keys is not None else []
         super().__init__(**kwargs)
@@ -50,6 +57,14 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
         with timeit_context("Flattening out Checkpoints"):
             task_vectors = [state_dict_sub(check, ptm_check) for check in ft_checks]
+            if isinstance(self.alpha, Iterable):
+                assert len(self.alpha) == len(
+                    task_vectors
+                ), "Alpha and task vectors must have the same length"
+                task_vectors = [
+                    state_dict_mul(state_dict=tv, scalar=alpha)
+                    for alpha, tv in zip(self.alpha, task_vectors)
+                ]
         new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
             task_vectors,
@@ -57,6 +72,11 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
             accelerator=self.fabric.device,
         )
+        # If alpha is a float, we need to scale the new merged task vector by alpha
+        if self.alpha is not None and isinstance(self.alpha, float):
+            print(f"Scaling new merged task vector by alpha: {self.alpha}")
+            new_merged_tv = state_dict_mul(state_dict=new_merged_tv, scalar=self.alpha)
         pretrained_model.load_state_dict(
             state_dict_add(new_merged_tv, pretrained_model.state_dict())
         )

fusion-bench 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl