PyPI - fusion-bench - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

fusion_bench/__main__.py +4 -0
fusion_bench/dataset/fer2013.py +1 -0
fusion_bench/method/__init__.py +26 -4
fusion_bench/method/classification/__init__.py +1 -0
fusion_bench/method/classification/clip_finetune.py +1 -3
fusion_bench/method/classification/continual_clip_finetune.py +297 -0
fusion_bench/method/dare/__init__.py +1 -0
fusion_bench/method/dare/task_arithmetic.py +14 -7
fusion_bench/method/dare/ties_merging.py +100 -0
fusion_bench/method/isotropic_merging/__init__.py +15 -0
fusion_bench/method/isotropic_merging/iso.py +114 -0
fusion_bench/method/isotropic_merging/iso_utils.py +176 -0
fusion_bench/method/opcm/__init__.py +4 -0
fusion_bench/method/opcm/opcm.py +277 -0
fusion_bench/method/opcm/task_arithmetic.py +115 -0
fusion_bench/method/opcm/ties_merging.py +156 -0
fusion_bench/method/opcm/utils.py +73 -0
fusion_bench/method/opcm/weight_average.py +120 -0
fusion_bench/method/slerp/slerp.py +1 -1
fusion_bench/method/task_singular_vector/TSVM.py +22 -2
fusion_bench/method/task_singular_vector/utils/TSVM_utils.py +91 -93
fusion_bench/method/ties_merging/ties_merging.py +10 -0
fusion_bench/metrics/continual_learning/backward_transfer.py +22 -0
fusion_bench/mixins/clip_classification.py +4 -1
fusion_bench/programs/fabric_fusion_program.py +22 -11
fusion_bench/scripts/cli.py +1 -0
fusion_bench/taskpool/base_pool.py +1 -1
fusion_bench/taskpool/clip_vision/taskpool.py +12 -7
fusion_bench/utils/__init__.py +2 -1
fusion_bench/utils/dict.py +43 -0
fusion_bench/utils/expr.py +90 -0
fusion_bench/utils/fabric.py +17 -0
fusion_bench/utils/instantiate.py +7 -1
fusion_bench/utils/json.py +30 -0
fusion_bench/utils/parameters.py +27 -7
fusion_bench/utils/path.py +15 -0
fusion_bench/utils/plot/color_data.py +1726 -0
fusion_bench/utils/rich_utils.py +15 -0
fusion_bench/utils/set.py +8 -0
fusion_bench/utils/tensorboard.py +51 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/METADATA +17 -18
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/RECORD +58 -29
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/WHEEL +1 -1
fusion_bench_config/method/classification/clip_continual_finetune.yaml +28 -0
fusion_bench_config/method/classification/clip_finetune.yaml +26 -0
fusion_bench_config/method/clip_finetune.yaml +2 -2
fusion_bench_config/method/dare/ties_merging.yaml +15 -0
fusion_bench_config/method/isotropic_merging/iso_c.yaml +4 -0
fusion_bench_config/method/isotropic_merging/iso_cts.yaml +5 -0
fusion_bench_config/method/opcm/opcm.yaml +12 -0
fusion_bench_config/method/opcm/task_arithmetic.yaml +12 -0
fusion_bench_config/method/opcm/ties_merging.yaml +18 -0
fusion_bench_config/method/opcm/weight_average.yaml +10 -0
fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml +6 -0
fusion_bench_config/modelpool/CLIPVisionModelPool/clip-vit-base-patch32_two_tasks_control_task.yaml +18 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/LICENSE +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/entry_points.txt +0 -0
{fusion_bench-0.2.8.dist-info → fusion_bench-0.2.10.dist-info}/top_level.txt +0 -0

fusion_bench/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from fusion_bench.scripts.cli import main
+if __name__ == "__main__":
+    main()

fusion_bench/dataset/fer2013.py CHANGED Viewed

@@ -7,6 +7,7 @@ def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train")
     dataset = dataset.rename_columns({"jpg": "image", "cls": "label"})
     return dataset
 if __name__ == "__main__":
     dataset = load_fer2013(split="test")
     print(dataset)

fusion_bench/method/__init__.py CHANGED Viewed

@@ -9,7 +9,10 @@ _import_structure = {
     "base_algorithm": ["BaseModelFusionAlgorithm", "BaseAlgorithm"],
     "dummy": ["DummyAlgorithm"],
     # single task learning (fine-tuning)
-    "classification": ["ImageClassificationFineTuningForCLIP"],
+    "classification": [
+        "ImageClassificationFineTuningForCLIP",
+        "ContinualImageClassificationFineTuningForCLIP",
+    ],
     "lm_finetune": ["FullFinetuneSFT", "PeftFinetuneSFT", "BradleyTerryRewardModeling"],
     # analysis
     "analysis": ["TaskVectorCosSimilarity", "TaskVectorViolinPlot"],
@@ -27,11 +30,12 @@ _import_structure = {
         "TaskArithmeticForLlama",
         "LinearInterpolationAlgorithm",
     ],
+    "slerp": ["SlerpMergeAlgorithm"],
     "simple_average": ["SimpleAverageAlgorithm"],
     "weighted_average": ["WeightedAverageAlgorithm", "WeightedAverageForLLama"],
     "task_arithmetic": ["TaskArithmeticAlgorithm"],
     "ties_merging": ["TiesMergingAlgorithm"],
-    "dare": ["DareSimpleAverage", "DareTaskArithmetic"],
+    "dare": ["DareSimpleAverage", "DareTaskArithmetic", "DareTiesMerging"],
     "fisher_merging": [
         "FisherMergingForCLIPVisionModel",
         "FisherMergingAlgorithmForGPT2",
@@ -50,6 +54,13 @@ _import_structure = {
     ],
     "ada_svd": ["AdaSVDMergingForCLIPVisionModel"],
     "task_singular_vector": ["TaskSingularVectorMerging"],
+    "isotropic_merging": [
+        "ISO_C_Merge",  # alias
+        "ISO_CTS_Merge",  # alias
+        "IsotropicMergingInCommonAndTaskSubspace",
+        "IsotropicMergingInCommonSubspace",
+    ],
+    "opcm": ["OPCMForCLIP"],
     # plug-and-play model merging methods
     "concrete_subspace": [
         "ConcreteTaskArithmeticAlgorithmForCLIP",
@@ -96,13 +107,16 @@ if TYPE_CHECKING:
     from .adamerging import *
     from .analysis import TaskVectorCosSimilarity, TaskVectorViolinPlot
     from .base_algorithm import BaseAlgorithm, BaseModelFusionAlgorithm
-    from .classification import ImageClassificationFineTuningForCLIP
+    from .classification import (
+        ContinualImageClassificationFineTuningForCLIP,
+        ImageClassificationFineTuningForCLIP,
+    )
     from .concrete_subspace import (
         ConcreteLayerWiseAdaMergingForCLIP,
         ConcreteTaskArithmeticAlgorithmForCLIP,
         ConcreteTaskWiseAdaMergingForCLIP,
     )
-    from .dare import DareSimpleAverage, DareTaskArithmetic
+    from .dare import DareSimpleAverage, DareTaskArithmetic, DareTiesMerging
     from .dawe import DataAdaptiveWeightEnsemblingForCLIP
     from .depth_upscaling import DepthUpscalingAlgorithm, DepthUpscalingForLlama
     from .dummy import DummyAlgorithm
@@ -112,6 +126,12 @@ if TYPE_CHECKING:
         WeightedEnsembleAlgorithm,
     )
     from .fisher_merging import FisherMergingForCLIPVisionModel
+    from .isotropic_merging import (
+        ISO_C_Merge,
+        ISO_CTS_Merge,
+        IsotropicMergingInCommonAndTaskSubspace,
+        IsotropicMergingInCommonSubspace,
+    )
     from .linear import (
         ExPOAlgorithm,
         ExPOAlgorithmForLlama,
@@ -127,6 +147,7 @@ if TYPE_CHECKING:
         MixtralUpscalingAlgorithm,
     )
     from .model_recombination import ModelRecombinationAlgorithm
+    from .opcm import OPCMForCLIP
     from .pruning import (
         MagnitudeDiffPruningAlgorithm,
         MagnitudePruningForLlama,
@@ -140,6 +161,7 @@ if TYPE_CHECKING:
     from .rankone_moe import CLIPRankOneMoEAlgorithm, RankOneMoEAlgorithm
     from .regmean import RegMeanAlgorithmForCLIP, RegMeanAlgorithmForGPT2
     from .simple_average import SimpleAverageAlgorithm
+    from .slerp import SlerpMergeAlgorithm
     from .smile_upscaling import (
         SingularProjectionMergingAlgorithm,
         SmileUpscalingAlgorithm,

fusion_bench/method/classification/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
 # flake8: noqa F401
 from .clip_finetune import ImageClassificationFineTuningForCLIP
+from .continual_clip_finetune import ContinualImageClassificationFineTuningForCLIP

fusion_bench/method/classification/clip_finetune.py CHANGED Viewed

@@ -184,9 +184,7 @@ class ImageClassificationFineTuningForCLIP(
                 self.save_model(classifier, save_path)
         if config.state_dict_save_path is not None:
-            self.save_model(
-                classifier, config.state_dict_save_path, trainable_only=True
-            )
+            self.save_model(classifier, config.state_dict_save_path)
         self.print_profile_summary()
         return classifier.clip_model.vision_model

fusion_bench/method/classification/continual_clip_finetune.py ADDED Viewed

@@ -0,0 +1,297 @@
+import os
+import random
+import time
+from copy import deepcopy
+from typing import Optional, Tuple, cast
+import lightning as L
+import torch
+from omegaconf import DictConfig, OmegaConf
+from peft import LoraConfig, PeftModel, get_peft_model
+from peft.tuners.lora import LoraLayer
+from safetensors.torch import save_file
+from torch import nn
+from torch.utils.data import DataLoader
+from tqdm.auto import tqdm
+from transformers import CLIPModel, CLIPProcessor, CLIPVisionModel
+from transformers.models.clip.modeling_clip import CLIPVisionTransformer
+from fusion_bench import BaseAlgorithm, print_parameters
+from fusion_bench.compat.modelpool import to_modelpool
+from fusion_bench.dataset.clip_dataset import CLIPDataset
+from fusion_bench.mixins import CLIPClassificationMixin
+from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.modelpool import CLIPVisionModelPool
+from fusion_bench.models.hf_clip import HFCLIPClassifier
+from fusion_bench.models.linearized.linearized_model_utils import LinearizedModelWraper
+from fusion_bench.taskpool import CLIPVisionModelTaskPool
+from fusion_bench.utils.data import InfiniteDataLoader
+from fusion_bench.utils.fabric import seed_everything_by_time
+from fusion_bench.utils.json import load_from_json, save_to_json
+class ContinualImageClassificationFineTuningForCLIP(
+    CLIPClassificationMixin,
+    SimpleProfilerMixin,
+    BaseAlgorithm,
+):
+    # attributes to configuration keys mapping
+    _config_mapping = BaseAlgorithm._config_mapping | {
+        "seed": "seed",
+        "shuffle_order": "shuffle_order",
+        "learning_rate": "learning_rate",
+        "weight_decay": "weight_decay",
+        "num_steps": "num_steps",
+        "batch_size": "batch_size",
+        "num_workers": "num_workers",
+        "save_interval": "save_interval",
+        "state_dict_load_path": "state_dict_load_path",
+        "state_dict_save_path": "state_dict_save_path",
+        "skip_training": "skip_training",
+        "use_lora": "use_lora",
+        "lora_config": "lora_config",
+    }
+    def __init__(
+        self,
+        seed: int = 42,
+        shuffle_order: bool = True,
+        learning_rate: float = 1e-5,
+        weight_decay: float = 0,
+        num_steps: int = 4000,
+        batch_size: int = 128,
+        num_workers: int = 16,
+        save_interval: int = 500,
+        state_dict_load_path: Optional[str] = None,
+        state_dict_save_path: Optional[str] = None,
+        skip_training: bool = False,
+        use_lora: bool = False,
+        lora_config: Optional[LoraConfig] = None,
+    ):
+        self.seed = seed
+        self.shuffle_order = shuffle_order
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.num_steps = num_steps
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.save_interval = save_interval
+        self.state_dict_load_path = state_dict_load_path
+        self.state_dict_save_path = state_dict_save_path
+        self.skip_training = skip_training
+        self.use_lora = use_lora
+        self.lora_config = lora_config
+    def run(self, modelpool: CLIPVisionModelPool):
+        self.modelpool = to_modelpool(modelpool)
+        config = self.config
+        self.log_hyperparams(config, filename="method_config.yaml")
+        self.finetune_method = "fine-tune"
+        if self.seed is not None:
+            L.seed_everything(self.seed)
+        else:
+            seed_everything_by_time(self.fabric)
+        task_names = list(modelpool.train_dataset_names)
+        if self.shuffle_order:
+            random.shuffle(task_names)
+        if self.fabric.is_global_zero:
+            save_to_json(task_names, os.path.join(self.log_dir, "task_names.json"))
+        if self._program.taskpool is not None and isinstance(
+            self._program.taskpool, CLIPVisionModelTaskPool
+        ):
+            has_taskpool = True
+            taskpool = cast(CLIPVisionModelTaskPool, self._program.taskpool)
+            test_datasets = taskpool._test_datasets
+        else:
+            has_taskpool = False
+        with self.profile("setup model and optimizer"):
+            processor, classifier, optimizer, lr_scheduler = self.setup_model()
+            if self.state_dict_load_path is not None:
+                self.fabric.load(
+                    self.state_dict_load_path,
+                    {"vision_model": classifier.clip_model.vision_model},
+                )
+                if self.skip_training:
+                    return classifier.clip_model.vision_model
+            self.setup_zero_shot_classification_head(
+                clip_processor=processor,
+                clip_model=classifier.clip_model,
+                task_names=task_names,
+            )
+            init_optimizer_state_dict = optimizer.state_dict()
+            init_lr_scheduler_state_dict = lr_scheduler.state_dict()
+            self.fabric.setup(classifier, optimizer)
+        with self.profile("setup data"):
+            train_datasets = [
+                CLIPDataset(modelpool.load_train_dataset(task_name), processor)
+                for task_name in task_names
+            ]
+            train_dataloaders = [
+                DataLoader(
+                    dataset,
+                    shuffle=True,
+                    batch_size=self.batch_size,
+                    num_workers=self.num_workers,
+                )
+                for dataset in train_datasets
+            ]
+            train_dataloaders = self.fabric.setup_dataloaders(*train_dataloaders)
+            if not isinstance(train_dataloaders, (list, tuple)):
+                train_dataloaders = [train_dataloaders]
+            train_dataloader_iters = [
+                iter(InfiniteDataLoader(loader)) for loader in train_dataloaders
+            ]
+        # continual train
+        for task_idx, task_name in tqdm(
+            enumerate(task_names),
+            dynamic_ncols=True,
+            disable=not self.fabric.is_global_zero,
+        ):
+            train_dataloader_iter = train_dataloader_iters[task_idx]
+            # reset optimizer and lr scheduler
+            print("reset optimizer and lr scheduler")
+            optimizer.load_state_dict(init_optimizer_state_dict)
+            lr_scheduler.load_state_dict(init_lr_scheduler_state_dict)
+            for step_idx in tqdm(
+                range(self.num_steps),
+                desc=f"continual fine-tune on {task_name}",
+                disable=not self.fabric.is_global_zero,
+                dynamic_ncols=True,
+                leave=False,
+            ):
+                optimizer.zero_grad()
+                loss = 0
+                with self.profile("data loading"):
+                    batch = next(train_dataloader_iter)
+                    images, labels = batch
+                with self.profile("forward"):
+                    classifier.zeroshot_weights = self.zeroshot_weights[task_name]
+                    logits = classifier(images)
+                    assert (
+                        labels.max() < logits.shape[1]
+                    ), f"for task {task_name}, labels.max() = {labels.max()}, logits.shape[1] = {logits.shape[1]}"
+                loss = loss + nn.functional.cross_entropy(logits, labels)
+                with self.profile("backward"):
+                    self.fabric.backward(loss)
+                with self.profile("optimizer step"):
+                    optimizer.step()
+                    lr_scheduler.step()
+                metrics = {"train/loss": loss}
+                self.fabric.log_dict(metrics, step=step_idx)
+                if (step_idx + 1) % self.save_interval == 0:
+                    save_path = os.path.join(
+                        self.log_dir,
+                        "checkpoints",
+                        f"task={task_idx}_step={step_idx}.ckpt",
+                    )
+                    self.save_model(classifier, save_path)
+            if has_taskpool:
+                taskpool._is_setup = False
+                taskpool._test_datasets = DictConfig(
+                    {t: test_datasets[t] for t in task_names[: task_idx + 1]}
+                )
+                eval_report = taskpool.evaluate(
+                    deepcopy(classifier.clip_model.vision_model),
+                    name=task_name,
+                )
+                if self.fabric.is_global_zero:
+                    save_to_json(
+                        eval_report,
+                        os.path.join(self.log_dir, f"results_{task_idx}.json"),
+                    )
+        if self.state_dict_save_path is not None:
+            self.save_model(classifier, self.state_dict_save_path)
+        self.print_profile_summary()
+        return classifier.clip_model.vision_model
+    def save_model(
+        self,
+        model: HFCLIPClassifier | CLIPModel | CLIPVisionModel | CLIPVisionTransformer,
+        save_path: str,
+    ):
+        """
+        Save the vision model to the specified path.
+        Args:
+            model (Union[HFCLIPClassifier, CLIPModel, CLIPVisionModel, CLIPVisionTransformer]): The model to save.
+            save_path (str): The path to save the model.
+        """
+        if isinstance(model, HFCLIPClassifier):
+            vision_model = model.clip_model.vision_model
+        elif isinstance(model, CLIPModel):
+            vision_model = model.vision_model
+        elif isinstance(model, CLIPVisionModel):
+            vision_model = model.vision_model
+        elif isinstance(model, CLIPVisionTransformer):
+            vision_model = model
+        else:
+            raise ValueError(f"Unsupported model type: {type(model)}")
+        save_dir = os.path.dirname(save_path)
+        if save_dir and not os.path.exists(save_dir):
+            os.makedirs(save_dir, exist_ok=True)
+        self.fabric.save(save_path, {"vision_model": vision_model})
+    def setup_model(self):
+        """
+        Sets up the model, optimizer, and learning rate scheduler.
+        This method initializes the CLIP model, applies LoRA if specified, and configures the optimizer and learning rate scheduler.
+        Returns:
+            Tuple: A tuple containing the processor, classifier, optimizer, and learning rate scheduler.
+        """
+        config = self.config
+        modelpool = self.modelpool
+        clip_model: CLIPModel = modelpool.load_clip_model("_pretrained_")
+        processor = modelpool.load_processor()
+        self.finetune_method = "full fine-tune"
+        if self.use_lora:
+            self.finetune_method = "lora fine-tune"
+            lora_config = LoraConfig(
+                **OmegaConf.to_container(
+                    self.lora_config, resolve=True, enum_to_str=True
+                )
+            )
+            clip_model.vision_model = get_peft_model(
+                clip_model.vision_model, lora_config
+            )
+        classifier = HFCLIPClassifier(clip_model, processor=processor)
+        if self.fabric.is_global_zero:
+            print("=== Model Summary (For Vision Model Only) ===")
+            print_parameters(classifier.clip_model.vision_model)
+        # configure optimizers
+        optimizer = torch.optim.Adam(
+            [
+                p
+                for p in classifier.clip_model.vision_model.parameters()
+                if p.requires_grad
+            ],
+            lr=self.learning_rate,
+            weight_decay=self.weight_decay,
+        )
+        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            optimizer=optimizer, T_max=self.num_steps
+        )
+        return processor, classifier, optimizer, lr_scheduler

fusion_bench/method/dare/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
 # flake8: noqa F401
 from .simple_average import DareSimpleAverage
 from .task_arithmetic import DareTaskArithmetic
+from .ties_merging import DareTiesMerging

fusion_bench/method/dare/task_arithmetic.py CHANGED Viewed

@@ -33,21 +33,28 @@ class DareTaskArithmetic(BaseAlgorithm):
         self.rescale = rescale
         super().__init__(**kwargs)
+    def _load_task_vector(
+        self,
+        modelpool: BaseModelPool,
+        model_name: str,
+        pretrained_model: nn.Module,
+    ):
+        finetuned_model = modelpool.load_model(model_name)
+        task_vector = module_sub_(finetuned_model, pretrained_model)
+        return task_vector
     @torch.no_grad()
     def run(self, modelpool: BaseModelPool):
         assert (
             self.sparsity_ratio >= 0 and self.sparsity_ratio <= 1
         ), "Sparsity ratio must be between 0 and 1"
         pretrained_model = modelpool.load_pretrained_model()
-        finetuned_models = {
-            model_name: modelpool.load_model(model_name)
-            for model_name in modelpool.model_names
-        }
+        # load task vectors
         task_vectors = {
-            model_name: module_sub_(finetuned_models[model_name], pretrained_model)
-            for model_name in finetuned_models
+            model_name: self._load_task_vector(modelpool, model_name, pretrained_model)
+            for model_name in modelpool.model_names
         }
-        del finetuned_models
         # drop and rescale task vectors
         for model_name, tv in task_vectors.items():

fusion_bench/method/dare/ties_merging.py ADDED Viewed

@@ -0,0 +1,100 @@
+from typing import Literal
+import torch
+from torch import Tensor, nn
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.method.ties_merging.ties_merging_utils import ties_merging
+from fusion_bench.utils.parameters import state_dict_to_vector, vector_to_state_dict
+from fusion_bench.utils.state_dict_arithmetic import state_dict_sum
+from .utils import (
+    module_random_drop_,
+    module_sub_,
+    param_random_drop_,
+    trainable_state_dict,
+)
+class DareTiesMerging(BaseAlgorithm):
+    def __init__(
+        self,
+        # DARE parameters
+        sparsity_ratio: float,
+        only_on_linear_weights: bool,
+        rescale: bool,
+        # Ties merging parameters
+        scaling_factor: float,
+        threshold: int,
+        remove_keys: list[str],
+        merge_func: Literal["sum", "mean", "max"],
+        **kwargs,
+    ):
+        self.sparsity_ratio = sparsity_ratio
+        self.only_on_linear_weights = only_on_linear_weights
+        self.rescale = rescale
+        self.scaling_factor = scaling_factor
+        self.threshold = threshold
+        self.remove_keys = remove_keys
+        self.merge_func = merge_func
+        super().__init__(**kwargs)
+    @torch.no_grad()
+    def _load_task_vector(
+        self,
+        modelpool: BaseModelPool,
+        model_name: str,
+        pretrained_model: nn.Module,
+    ):
+        finetuned_model = modelpool.load_model(model_name)
+        task_vector = module_sub_(finetuned_model, pretrained_model)
+        return task_vector
+    def run(self, modelpool: BaseModelPool):
+        assert (
+            self.sparsity_ratio >= 0 and self.sparsity_ratio <= 1
+        ), "Sparsity ratio must be between 0 and 1"
+        pretrained_model = modelpool.load_pretrained_model()
+        # load task vectors
+        task_vectors = {
+            model_name: self._load_task_vector(modelpool, model_name, pretrained_model)
+            for model_name in modelpool.model_names
+        }
+        # drop and rescale task vectors
+        for model_name, tv in task_vectors.items():
+            if self.only_on_linear_weights:
+                for module_name, module in tv.named_modules():
+                    if isinstance(module, nn.Linear):
+                        print(f"pruning model: `{model_name}`, layer: {module_name}.")
+                        param_random_drop_(
+                            module.weight, self.sparsity_ratio, rescale=self.rescale
+                        )
+            else:
+                print(f"pruning model: `{model_name}`")
+                module_random_drop_(tv, self.sparsity_ratio, rescale=self.rescale)
+        ptm_check = pretrained_model.state_dict()
+        flat_ptm = state_dict_to_vector(ptm_check, self.remove_keys)
+        tv_flat_checks = torch.vstack(
+            [
+                state_dict_to_vector(check.state_dict(), self.remove_keys)
+                for check in task_vectors.values()
+            ]
+        )
+        del task_vectors
+        # Perform TIES Merging
+        merged_tv = ties_merging(
+            tv_flat_checks,
+            reset_thresh=self.threshold,
+            merge_func=self.merge_func,
+        )
+        merged_check = flat_ptm + self.scaling_factor * merged_tv
+        merged_state_dict = vector_to_state_dict(
+            merged_check, ptm_check, remove_keys=self.remove_keys
+        )
+        pretrained_model.load_state_dict(merged_state_dict)
+        return pretrained_model

fusion_bench/method/isotropic_merging/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""
+This module contains the implementation of the Isotropic Merging in Common Subspace (ISO-C) algorithm and Isotropic Merging in Common and Task-Specific Subspaces (Iso-CTS) algorithm.
+Modified from the original implementation: https://github.com/danielm1405/iso-merging
+Reference:
+- Daniel Marczak, et al. No Task Left Behind: Isotropic Model Merging with Common and Task-Specific Subspaces. 2025.
+    https://arxiv.org/abs/2502.04959
+"""
+from .iso import (
+    ISO_C_Merge,
+    ISO_CTS_Merge,
+    IsotropicMergingInCommonSubspace,
+    IsotropicMergingInCommonAndTaskSubspace,
+)

fusion_bench/method/isotropic_merging/iso.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import List
+import torch
+from fusion_bench import BaseAlgorithm, BaseModelPool
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.utils.state_dict_arithmetic import (
+    state_dict_add,
+    state_dict_sub,
+    state_dict_mul,
+)
+from .iso_utils import iso_c, iso_cts, check_parameterNamesMatch
+class IsotropicMergingInCommonSubspace(BaseAlgorithm, LightningFabricMixin):
+    """
+    Isotropic Merging in Common Subspace (Iso-C)
+    """
+    def __init__(
+        self,
+        scaling_factor: float,
+        exclude_keys: List[str] = None,
+    ):
+        self.scaling_factor = scaling_factor
+        self.exclude_keys = exclude_keys
+        super().__init__()
+    def run(self, modelpool: BaseModelPool):
+        # load the pretrained model and the task vectors of all the finetuned models
+        with torch.no_grad():
+            pretrained_model = modelpool.load_pretrained_model()
+            task_vectors = []
+            for model_name in modelpool.model_names:
+                finetuned_model = modelpool.load_model(model_name)
+                task_vectors.append(
+                    state_dict_sub(
+                        finetuned_model.state_dict(), pretrained_model.state_dict()
+                    )
+                )
+                del finetuned_model  # free memory
+            check_parameterNamesMatch(task_vectors)
+        # compute the merged task vector
+        merged_tv = iso_c(
+            task_vectors,
+            accelerator=self.fabric.device,
+            exclude_keys=self.exclude_keys,
+        )
+        # merged_parameters = pretrained_parameters + scaling_factor * merged_task_vector
+        pretrained_model.load_state_dict(
+            state_dict_add(
+                pretrained_model.state_dict(),
+                state_dict_mul(merged_tv, self.scaling_factor),
+            )
+        )
+        return pretrained_model
+class IsotropicMergingInCommonAndTaskSubspace(BaseAlgorithm, LightningFabricMixin):
+    """
+    Isotropic Merging in Common and Task-Specific Subspaces (Iso-CTS)
+    """
+    def __init__(
+        self,
+        scaling_factor: float,
+        common_space_fraction: float,
+        exclude_keys: List[str] = None,
+    ):
+        self.common_space_fraction = common_space_fraction
+        self.scaling_factor = scaling_factor
+        self.exclude_keys = exclude_keys
+        super().__init__()
+    def run(self, modelpool: BaseModelPool):
+        # load the pretrained model and the task vectors of all the finetuned models
+        with torch.no_grad():
+            pretrained_model = modelpool.load_pretrained_model()
+            task_vectors = []
+            for model_name in modelpool.model_names:
+                finetuned_model = modelpool.load_model(model_name)
+                task_vectors.append(
+                    state_dict_sub(
+                        finetuned_model.state_dict(), pretrained_model.state_dict()
+                    )
+                )
+                del finetuned_model  # free memory
+            check_parameterNamesMatch(task_vectors)
+        # compute the merged task vector
+        merged_tv = iso_cts(
+            task_vectors,
+            common_space_fraction=self.common_space_fraction,
+            accelerator=self.fabric.device,
+            exclude_keys=self.exclude_keys,
+        )
+        # merged_parameters = pretrained_parameters + scaling_factor * merged_task_vector
+        pretrained_model.load_state_dict(
+            state_dict_add(
+                pretrained_model.state_dict(),
+                state_dict_mul(merged_tv, self.scaling_factor),
+            )
+        )
+        return pretrained_model
+ISO_C_Merge = IsotropicMergingInCommonSubspace  # alias
+ISO_CTS_Merge = IsotropicMergingInCommonAndTaskSubspace  # alias

fusion-bench 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

fusion-bench 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl