PyPI - fusion-bench - Versions diffs - 0.2.11__py3-none-any.whl → 0.2.12__py3-none-any.whl - Mend

fusion-bench 0.2.11py3-none-any.whl → 0.2.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

fusion_bench/compat/method/__init__.py CHANGED Viewed

@@ -20,7 +20,7 @@ class AlgorithmFactory:
         # model merging methods
         "clip_task_wise_adamerging": ".adamerging.clip_task_wise_adamerging.CLIPTaskWiseAdaMergingAlgorithm",
         "clip_layer_wise_adamerging": ".adamerging.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
-        "clip_layer_wise_adamerging_doge_ta": ".DOGE_TA.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
+        "clip_layer_wise_adamerging_doge_ta": ".doge_ta.clip_layer_wise_adamerging.CLIPLayerWiseAdaMergingAlgorithm",
         "singular_projection_merging": "fusion_bench.method.smile_upscaling.singular_projection_merging.SingularProjectionMergingAlgorithm",
         "clip_layer_wise_adamerging_surgery": ".surgery.clip_layer_wise_adamerging_surgery.CLIPLayerWiseAdaMergingSurgeryAlgorithm",
         # plug-and-play model merging methods

fusion_bench/dataset/fer2013.py CHANGED Viewed

@@ -7,7 +7,6 @@ def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train")
     dataset = dataset.rename_columns({"jpg": "image", "cls": "label"})
     return dataset
 if __name__ == "__main__":
     dataset = load_fer2013(split="test")
     print(dataset)

fusion_bench/method/__init__.py CHANGED Viewed

@@ -53,7 +53,7 @@ _import_structure = {
         "PWEMoExactParetoOptimalForCLIP",
     ],
     "ada_svd": ["AdaSVDMergingForCLIPVisionModel"],
-    "DOGE_TA": ["DOGE_TA_Algorithm"],
+    "doge_ta": ["DOGE_TA_Algorithm"],
     "task_singular_vector": ["TaskSingularVectorMerging"],
     "isotropic_merging": [
         "ISO_C_Merge",  # alias
@@ -128,7 +128,7 @@ if TYPE_CHECKING:
     from .dare import DareSimpleAverage, DareTaskArithmetic, DareTiesMerging
     from .dawe import DataAdaptiveWeightEnsemblingForCLIP
     from .depth_upscaling import DepthUpscalingAlgorithm, DepthUpscalingForLlama
-    from .DOGE_TA import DOGE_TA_Algorithm
+    from .doge_ta import DOGE_TA_Algorithm
     from .dummy import DummyAlgorithm
     from .ensemble import (
         MaxModelPredictorAlgorithm,

fusion_bench/method/adamerging/clip_layer_wise_adamerging.py CHANGED Viewed

@@ -9,7 +9,7 @@ fusion_bench \
     modelpool=clip-vit-base-patch32_TA8 \
     taskpool=clip-vit-classification_TA8 \
     fabric.loggers.root_dir=outputs/logs/ViT-B-32 \
-    fabric.loggers.name=clip_layer_wise_adamerging_adam
+    fabric.loggers.name=clip_layer_wise_adamerging_adamerging
 ```
 """

fusion_bench/method/doge_ta/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # flake8: noqa F401
2	+ from .doge_ta import DOGE_TA_Algorithm

fusion_bench/method/{DOGE_TA → doge_ta}/clip_layer_wise_adamerging.py RENAMED Viewed

@@ -9,7 +9,7 @@ fusion_bench \
     modelpool=clip-vit-base-patch32_TA8 \
     taskpool=clip-vit-classification_TA8 \
     fabric.loggers.root_dir=outputs/logs/ViT-B-32 \
-    fabric.loggers.name=clip_layer_wise_adamerging_adam
+    fabric.loggers.name=clip_layer_wise_adamerging_adamerging
 ```
 """

fusion_bench/method/{DOGE_TA/DOGE_TA.py → doge_ta/doge_ta.py} RENAMED Viewed

@@ -7,7 +7,7 @@ Example Usage:
 ```bash
 fusion_bench \
-    method=DOGE_TA/DOGE_TA \
+    method=doge_ta/doge_ta \
     modelpool=CLIPVisionModelPool/clip-vit-base-patch32_TA8_model_only \
     taskpool=CLIPVisionModelTaskPool/clip-vit-classification_TA8

fusion_bench/method/opcm/opcm.py CHANGED Viewed

@@ -15,7 +15,7 @@ from tqdm.auto import tqdm
 from transformers import CLIPVisionModel
 from fusion_bench import BaseAlgorithm, BaseModelPool
-from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
 from fusion_bench.taskpool import CLIPVisionModelTaskPool
 from fusion_bench.utils import instantiate
 from fusion_bench.utils.json import load_from_json, save_to_json
@@ -31,6 +31,7 @@ if TYPE_CHECKING:
 class OPCMForCLIP(
     BaseAlgorithm,
     LightningFabricMixin,
+    SimpleProfilerMixin,
 ):
     def __init__(
         self,
@@ -64,7 +65,8 @@ class OPCMForCLIP(
             L.seed_everything(self.seed)
         accelerator = self.fabric.device
-        pretrained_model = modelpool.load_pretrained_model()
+        with self.profile("loading model"):
+            pretrained_model = modelpool.load_pretrained_model()
         model_names = modelpool.model_names
         if self.shuffle_order:
@@ -83,15 +85,17 @@ class OPCMForCLIP(
             )
         # get the average model
-        merged_model = modelpool.load_model(model_names[0])
+        with self.profile("loading model"):
+            merged_model = modelpool.load_model(model_names[0])
         if self.evaluate_on_every_step:
-            self.taskpool._is_setup = False
-            self.taskpool._test_datasets = DictConfig(
-                {model_names[0]: self._test_datasets[model_names[0]]}
-            )
-            report = self.taskpool.evaluate(deepcopy(merged_model))
-            save_to_json(report, Path(self.log_dir) / "report_0.json")
+            with self.profile("evaluating model"):
+                self.taskpool._is_setup = False
+                self.taskpool._test_datasets = DictConfig(
+                    {model_names[0]: self._test_datasets[model_names[0]]}
+                )
+                report = self.taskpool.evaluate(deepcopy(merged_model))
+                save_to_json(report, Path(self.log_dir) / "report_0.json")
         self.avg_task_vector_norm = get_task_vector_norm(merged_model, pretrained_model)
         self.all_task_vector_norm = [self.avg_task_vector_norm]
@@ -113,90 +117,95 @@ class OPCMForCLIP(
             enumerate(model_names[1:]), desc="Processing models"
         ):
             model_idx += 1
-            task_model = modelpool.load_model(model_name)
+            with self.profile("loading model"):
+                task_model = modelpool.load_model(model_name)
-            self.all_task_vector_norm.append(
-                get_task_vector_norm(task_model, pretrained_model)
-            )
-            self.avg_task_vector_norm = np.mean(self.all_task_vector_norm)
-            self.fabric.log(
-                "model/task_vector_norm", self.all_task_vector_norm[-1], step=model_idx
-            )
-            self.fabric.log(
-                "model/avg_task_vector_norm", self.avg_task_vector_norm, step=model_idx
-            )
+            with self.profile("merging model"):
+                self.all_task_vector_norm.append(
+                    get_task_vector_norm(task_model, pretrained_model)
+                )
+                self.avg_task_vector_norm = np.mean(self.all_task_vector_norm)
+                self.fabric.log(
+                    "model/task_vector_norm", self.all_task_vector_norm[-1], step=model_idx
+                )
+                self.fabric.log(
+                    "model/avg_task_vector_norm", self.avg_task_vector_norm, step=model_idx
+                )
-            self.lambda_t = 1  # temporary value
-            for module_name, module in tqdm(
-                list(merged_model.named_modules()),
-                desc=f"Processing {model_name}",
-                leave=False,
-            ):
-                if not is_leaf_module(module):
-                    continue
-                if isinstance(module, nn.Linear):
-                    module.weight.data = self.merge_linear_weights(
-                        module.weight,
-                        pretrained_model.get_submodule(module_name).weight,
-                        task_model.get_submodule(module_name).weight,
-                        param_name=".".join([module_name, "weight"]),
-                        alpha=self.alpha,
-                        accelerator=accelerator,
-                    )
-                    if module.bias is not None:
-                        module.bias.data = self.merge_other_parameters(
-                            module.bias,
-                            pretrained_model.get_submodule(module_name).bias,
-                            task_model.get_submodule(module_name).bias,
-                            param_name=".".join([module_name, "bias"]),
+                self.lambda_t = 1  # temporary value
+                for module_name, module in tqdm(
+                    list(merged_model.named_modules()),
+                    desc=f"Processing {model_name}",
+                    leave=False,
+                ):
+                    if not is_leaf_module(module):
+                        continue
+                    if isinstance(module, nn.Linear):
+                        module.weight.data = self.merge_linear_weights(
+                            module.weight,
+                            pretrained_model.get_submodule(module_name).weight,
+                            task_model.get_submodule(module_name).weight,
+                            param_name=".".join([module_name, "weight"]),
+                            alpha=self.alpha,
                             accelerator=accelerator,
                         )
-                else:
-                    for param_name, param in module.named_parameters():
-                        param.data = self.merge_other_parameters(
-                            merged_W=param,
-                            pretrained_W=pretrained_model.get_submodule(
-                                module_name
-                            ).get_parameter(param_name),
-                            task_W=task_model.get_submodule(module_name).get_parameter(
-                                param_name
-                            ),
-                            param_name=".".join([module_name, param_name]),
-                            accelerator=accelerator,
-                        )
-            task_vector_norm = get_task_vector_norm(merged_model, pretrained_model)
-            self.lambda_t *= task_vector_norm / self.avg_task_vector_norm
-            for param_name, param in merged_model.named_parameters():
-                param.data = pretrained_model.get_parameter(param_name) + (
-                    param - pretrained_model.get_parameter(param_name)
-                ) * (self.avg_task_vector_norm / task_vector_norm)
-            self.fabric.log("model/lambda_t", self.lambda_t, step=model_idx)
-            self.fabric.log(
-                "empirical/lambda_t", np.sqrt(model_idx + 1), step=model_idx
-            )
-            self.previous_lambda_t = self.lambda_t
-            self.lambda_t = None
+                        if module.bias is not None:
+                            module.bias.data = self.merge_other_parameters(
+                                module.bias,
+                                pretrained_model.get_submodule(module_name).bias,
+                                task_model.get_submodule(module_name).bias,
+                                param_name=".".join([module_name, "bias"]),
+                                accelerator=accelerator,
+                            )
+                    else:
+                        for param_name, param in module.named_parameters():
+                            param.data = self.merge_other_parameters(
+                                merged_W=param,
+                                pretrained_W=pretrained_model.get_submodule(
+                                    module_name
+                                ).get_parameter(param_name),
+                                task_W=task_model.get_submodule(module_name).get_parameter(
+                                    param_name
+                                ),
+                                param_name=".".join([module_name, param_name]),
+                                accelerator=accelerator,
+                            )
+                task_vector_norm = get_task_vector_norm(merged_model, pretrained_model)
+                self.lambda_t *= task_vector_norm / self.avg_task_vector_norm
+                for param_name, param in merged_model.named_parameters():
+                    param.data = pretrained_model.get_parameter(param_name) + (
+                        param - pretrained_model.get_parameter(param_name)
+                    ) * (self.avg_task_vector_norm / task_vector_norm)
+                self.fabric.log("model/lambda_t", self.lambda_t, step=model_idx)
+                self.fabric.log(
+                    "empirical/lambda_t", np.sqrt(model_idx + 1), step=model_idx
+                )
+                self.previous_lambda_t = self.lambda_t
+                self.lambda_t = None
-            self.fabric.log(
-                "model/merged_task_vector_norm",
-                get_task_vector_norm(merged_model, pretrained_model),
-                step=model_idx,
-            )
+                self.fabric.log(
+                    "model/merged_task_vector_norm",
+                    get_task_vector_norm(merged_model, pretrained_model),
+                    step=model_idx,
+                )
             if self.save_on_every_step:
-                self.save_merged_model(merged_model, model_idx)
+                with self.profile("saving model"):
+                    self.save_merged_model(merged_model, model_idx)
             if self.evaluate_on_every_step:
-                self.taskpool._is_setup = False
-                self.taskpool._test_datasets = DictConfig(
-                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
-                )
-                report = self.taskpool.evaluate(deepcopy(merged_model))
-                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+                with self.profile("evaluating model"):
+                    self.taskpool._is_setup = False
+                    self.taskpool._test_datasets = DictConfig(
+                        {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
+                    )
+                    report = self.taskpool.evaluate(deepcopy(merged_model))
+                    save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+        self.print_profile_summary()
         return merged_model
     def save_merged_model(self, merged_model: CLIPVisionModel, step: int):
@@ -227,7 +236,7 @@ class OPCMForCLIP(
         split_rank = (s.cumsum(dim=0) / s.sum() > alpha).float().argmax().item()
         projected_task_tv = u.T @ task_tv @ v
-        projected_task_tv.diag().fill_(0)
+        projected_task_tv.diagonal().fill_(0)
         projected_task_tv[:split_rank, :split_rank] = 0

fusion_bench/method/opcm/task_arithmetic.py CHANGED Viewed

@@ -15,7 +15,7 @@ from tqdm.auto import tqdm
 from transformers import CLIPVisionModel
 from fusion_bench import BaseAlgorithm, BaseModelPool
-from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
 from fusion_bench.taskpool import CLIPVisionModelTaskPool
 from fusion_bench.utils.json import load_from_json, save_to_json
 from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_sub
@@ -24,7 +24,11 @@ if TYPE_CHECKING:
     from torch.utils.tensorboard import SummaryWriter
-class ContinualTaskArithmeticForCLIP(BaseAlgorithm, LightningFabricMixin):
+class ContinualTaskArithmeticForCLIP(
+    BaseAlgorithm,
+    LightningFabricMixin,
+    SimpleProfilerMixin,
+):
     def __init__(
         self,
         scaling_factor: float,
@@ -79,32 +83,42 @@ class ContinualTaskArithmeticForCLIP(BaseAlgorithm, LightningFabricMixin):
         for model_idx, model_name in tqdm(
             enumerate(model_names), desc="Processing models"
         ):
-            task_model = modelpool.load_model(model_name)
+            with self.profile("loading model"):
+                task_model = modelpool.load_model(model_name)
-            for param_name, param in task_model.named_parameters():
-                if not param.requires_grad:
-                    continue
+            with self.profile("merging model"):
+                for param_name, param in task_model.named_parameters():
+                    if not param.requires_grad:
+                        continue
-                task_param = param
-                merged_param = merged_model.get_parameter(param_name)
-                pretrained_param = pretrained_model.get_parameter(param_name)
+                    task_param = param
+                    merged_param = merged_model.get_parameter(param_name)
+                    pretrained_param = pretrained_model.get_parameter(param_name)
-                new_param = merged_param + self.scaling_factor * (
-                    task_param - pretrained_param
-                )
-                merged_model.get_parameter(param_name).data = new_param
+                    new_param = merged_param + self.scaling_factor * (
+                        task_param - pretrained_param
+                    )
+                    merged_model.get_parameter(param_name).data = new_param
             if self.save_on_every_step:
-                self.save_merged_model(merged_model, model_idx)
+                with self.profile("saving model"):
+                    self.save_merged_model(merged_model, model_idx)
             if self.evaluate_on_every_step:
-                self.taskpool._is_setup = False
-                self.taskpool._test_datasets = DictConfig(
-                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
-                )
-                report = self.taskpool.evaluate(deepcopy(merged_model))
-                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+                with self.profile("evaluating model"):
+                    self.taskpool._is_setup = False
+                    self.taskpool._test_datasets = DictConfig(
+                        {
+                            n: self._test_datasets[n]
+                            for n in model_names[: model_idx + 1]
+                        }
+                    )
+                    report = self.taskpool.evaluate(deepcopy(merged_model))
+                    save_to_json(
+                        report, Path(self.log_dir) / f"report_{model_idx}.json"
+                    )
+        self.print_profile_summary()
         return merged_model
     def save_merged_model(self, merged_model: CLIPVisionModel, step: int):

fusion_bench/method/opcm/ties_merging.py CHANGED Viewed

@@ -20,7 +20,7 @@ from fusion_bench.method.ties_merging.ties_merging_utils import (
     ties_merging,
     vector_to_state_dict,
 )
-from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
 from fusion_bench.taskpool import CLIPVisionModelTaskPool
 from fusion_bench.utils.json import load_from_json, save_to_json
 from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_sub
@@ -29,7 +29,11 @@ if TYPE_CHECKING:
     from torch.utils.tensorboard import SummaryWriter
-class ContinualTiesMergingForCLIP(BaseAlgorithm, LightningFabricMixin):
+class ContinualTiesMergingForCLIP(
+    BaseAlgorithm,
+    LightningFabricMixin,
+    SimpleProfilerMixin,
+):
     def __init__(
         self,
         scaling_factor: float,
@@ -84,68 +88,83 @@ class ContinualTiesMergingForCLIP(BaseAlgorithm, LightningFabricMixin):
             )
         # get the average model
-        pretrained_model = modelpool.load_pretrained_model()
+        with self.profile("loading model"):
+            pretrained_model = modelpool.load_pretrained_model()
         merged_model = deepcopy(pretrained_model)
         for model_idx, model_name in tqdm(
             enumerate(model_names), desc="Processing models"
         ):
-            task_model = modelpool.load_model(model_name)
+            with self.profile("loading model"):
+                task_model = modelpool.load_model(model_name)
-            task_vector = state_dict_sub(
-                task_model.state_dict(),
-                pretrained_model.state_dict(),
-            )
-            if model_idx == 0:
-                # if is the first model, the merged task vector is equal to the task vector
-                ties_merging_state_dict = task_vector
-            else:
-                # if is not the first model, we need to merge the task vector with the previous merged task vector
-                merged_tv = state_dict_sub(
-                    merged_model.state_dict(),
+            with self.profile("merging model"):
+                task_vector = state_dict_sub(
+                    task_model.state_dict(),
                     pretrained_model.state_dict(),
                 )
-                tv_flat_checks = torch.vstack(
-                    [
-                        state_dict_to_vector(merged_tv, remove_keys=self.remove_keys),
-                        state_dict_to_vector(task_vector, remove_keys=self.remove_keys),
-                    ]
-                )
-                # perform the TIES merging
-                ties_merging_tv = ties_merging(
-                    tv_flat_checks,
-                    reset_thresh=self.threshold,
-                    merge_func=self.merge_func,
-                )
-                # convert the merged task vector back to a state dict
-                ties_merging_state_dict = vector_to_state_dict(
-                    ties_merging_tv,
-                    merged_model.state_dict(),
-                    remove_keys=self.remove_keys,
-                )
-            for param_name, param in task_model.named_parameters():
-                if not param.requires_grad:
-                    continue
-                merged_param = merged_model.get_parameter(param_name)
-                new_param = (
-                    merged_param
-                    + self.scaling_factor * ties_merging_state_dict[param_name]
-                )
-                merged_model.get_parameter(param_name).data = new_param
+                if model_idx == 0:
+                    # if is the first model, the merged task vector is equal to the task vector
+                    ties_merging_state_dict = task_vector
+                else:
+                    # if is not the first model, we need to merge the task vector with the previous merged task vector
+                    merged_tv = state_dict_sub(
+                        merged_model.state_dict(),
+                        pretrained_model.state_dict(),
+                    )
+                    tv_flat_checks = torch.vstack(
+                        [
+                            state_dict_to_vector(
+                                merged_tv, remove_keys=self.remove_keys
+                            ),
+                            state_dict_to_vector(
+                                task_vector, remove_keys=self.remove_keys
+                            ),
+                        ]
+                    )
+                    # perform the TIES merging
+                    ties_merging_tv = ties_merging(
+                        tv_flat_checks,
+                        reset_thresh=self.threshold,
+                        merge_func=self.merge_func,
+                    )
+                    # convert the merged task vector back to a state dict
+                    ties_merging_state_dict = vector_to_state_dict(
+                        ties_merging_tv,
+                        merged_model.state_dict(),
+                        remove_keys=self.remove_keys,
+                    )
+                for param_name, param in task_model.named_parameters():
+                    if not param.requires_grad:
+                        continue
+                    merged_param = merged_model.get_parameter(param_name)
+                    new_param = (
+                        merged_param
+                        + self.scaling_factor * ties_merging_state_dict[param_name]
+                    )
+                    merged_model.get_parameter(param_name).data = new_param
             if self.save_on_every_step:
-                self.save_merged_model(merged_model, model_idx)
+                with self.profile("saving model"):
+                    self.save_merged_model(merged_model, model_idx)
             if self.evaluate_on_every_step:
-                self.taskpool._is_setup = False
-                self.taskpool._test_datasets = DictConfig(
-                    {n: self._test_datasets[n] for n in model_names[: model_idx + 1]}
-                )
-                report = self.taskpool.evaluate(deepcopy(merged_model))
-                save_to_json(report, Path(self.log_dir) / f"report_{model_idx}.json")
+                with self.profile("evaluating model"):
+                    self.taskpool._is_setup = False
+                    self.taskpool._test_datasets = DictConfig(
+                        {
+                            n: self._test_datasets[n]
+                            for n in model_names[: model_idx + 1]
+                        }
+                    )
+                    report = self.taskpool.evaluate(deepcopy(merged_model))
+                    save_to_json(
+                        report, Path(self.log_dir) / f"report_{model_idx}.json"
+                    )
+        self.print_profile_summary()
         return merged_model
     def save_merged_model(self, merged_model: CLIPVisionModel, step: int):

fusion_bench/models/wrappers/layer_wise_fusion.py CHANGED Viewed

@@ -16,6 +16,7 @@ import torch
 from torch import Tensor, nn
 from torch.func import functional_call
+from fusion_bench.models.utils import del_attr, get_attr, set_attr
 from fusion_bench.utils.type import StateDictType, TorchModelType
 __all__ = ["get_layer_wise_weights", "fuse_weights", "LayerWiseMergedModel"]
@@ -23,52 +24,6 @@ __all__ = ["get_layer_wise_weights", "fuse_weights", "LayerWiseMergedModel"]
 log = logging.getLogger(__name__)
-def del_attr(obj, names: List[str]):
-    """
-    Deletes an attribute from an object recursively.
-    Args:
-        obj (object): Object to delete attribute from.
-        names (list): List of attribute names to delete recursively.
-    """
-    if len(names) == 1:
-        delattr(obj, names[0])
-    else:
-        del_attr(getattr(obj, names[0]), names[1:])
-def set_attr(obj, names: List[str], val):
-    """
-    Sets an attribute of an object recursively.
-    Args:
-        obj (object): Object to set attribute of.
-        names (list): List of attribute names to set recursively.
-        val (object): Value to set the attribute to.
-    """
-    if len(names) == 1:
-        setattr(obj, names[0], val)
-    else:
-        set_attr(getattr(obj, names[0]), names[1:], val)
-def get_attr(obj, names: List[str]):
-    """
-    Gets an attribute of an object recursively.
-    Args:
-        obj (object): Object to get attribute of.
-        names (list): List of attribute names to get recursively.
-    Returns:
-        object: The attribute of the object.
-    """
-    if len(names) == 1:
-        return getattr(obj, names[0])
-    else:
-        return get_attr(getattr(obj, names[0]), names[1:])
 def get_layer_wise_weights(
     num_models: int,
     num_layers: int,

fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py CHANGED Viewed

@@ -10,132 +10,17 @@ import torch
 from torch import Tensor, nn
 from torch.func import functional_call
+from fusion_bench.models.utils import del_attr, get_attr, set_attr
 from fusion_bench.utils.state_dict_arithmetic import state_dict_add
 from fusion_bench.utils.type import StateDictType
+from .layer_wise_fusion import fuse_weights, get_layer_wise_weights
 __all__ = ["get_layer_wise_weights", "fuse_weights", "LayerWiseMergedModel"]
 log = logging.getLogger(__name__)
-def del_attr(obj, names: List[str]):
-    """
-    Deletes an attribute from an object recursively.
-    Args:
-        obj (object): Object to delete attribute from.
-        names (list): List of attribute names to delete recursively.
-    """
-    if len(names) == 1:
-        delattr(obj, names[0])
-    else:
-        del_attr(getattr(obj, names[0]), names[1:])
-def set_attr(obj, names: List[str], val):
-    """
-    Sets an attribute of an object recursively.
-    Args:
-        obj (object): Object to set attribute of.
-        names (list): List of attribute names to set recursively.
-        val (object): Value to set the attribute to.
-    """
-    if len(names) == 1:
-        setattr(obj, names[0], val)
-    else:
-        set_attr(getattr(obj, names[0]), names[1:], val)
-def get_attr(obj, names: List[str]):
-    """
-    Gets an attribute of an object recursively.
-    Args:
-        obj (object): Object to get attribute of.
-        names (list): List of attribute names to get recursively.
-    Returns:
-        object: The attribute of the object.
-    """
-    if len(names) == 1:
-        return getattr(obj, names[0])
-    else:
-        return get_attr(getattr(obj, names[0]), names[1:])
-def get_layer_wise_weights(
-    num_models: int,
-    num_layers: int,
-    init_values: float = None,
-    dtype: torch.dtype = torch.float32,
-):
-    """
-    Return a tensor of layer-wise weights for the given number of models and layers.
-    Args:
-        num_models (int): The number of models to fuse.
-        num_layers (int): The number of layers in each model.
-        init_values (float, optional): The initial value for each weight. Defaults to 1.0 / num_models.
-        dtype (torch.dtype): dtype of weights. This should be the same with model dtype.
-    Returns:
-        Tensor: A tensor of shape (num_models, num_layers) containing the layer-wise weights.
-    """
-    assert num_models >= 1, f"num_models must be >= 1, got {num_models}"
-    assert num_layers >= 1, f"num_layers must be >= 1, got {num_layers}"
-    if init_values is None:
-        init_values = 1.0 / num_models
-    return torch.full((num_models, num_layers), init_values, dtype=dtype)
-def _fuse_weights(layer_wise_weight: Tensor, tensors: List[Tensor]):
-    """
-    Fuse the layer-wise weights with the given state dictionaries.
-    Args:
-        layer_wise_weight (Tensor): A tensor of shape (num_models,) containing the layer-wise weights.
-        state_dicts (List[Tensor]): A list of state dictionaries, each containing the weights for a single layer.
-    Returns:
-        Tensor: A tensor of shape (num_params,) containing the fused weights.
-    """
-    assert len(layer_wise_weight) == len(
-        tensors
-    ), f"layer_wise_weight.shape={layer_wise_weight.shape}, len(tensors)={len(tensors)}"
-    return sum(
-        layer_wise_weight[i] * w.to(layer_wise_weight.device)
-        for i, w in enumerate(tensors)
-    )
-def fuse_weights(
-    layer_wise_weight: Tensor, state_dicts: List[StateDictType]
-) -> StateDictType:
-    """
-    Fuse the weights of multiple models using layer-wise fusion.
-    Args:
-        layer_wise_weight (Tensor): A tensor of shape (num_models, num_layers) representing the weight of each layer for each model.
-        state_dicts (List[StateDict]): A list of state dictionaries, one for each model.
-    Returns:
-        A dictionary mapping each weight tensor key to the fused weight tensor.
-    """
-    num_models = len(state_dicts)
-    num_layers = len(state_dicts[0])
-    assert layer_wise_weight.shape == (
-        num_models,
-        num_layers,
-    ), f"layer_wise_weight.shape={layer_wise_weight.shape}, expected (num_models, num_layers): ({num_models}, {num_layers})"
-    return {
-        k: _fuse_weights(
-            layer_wise_weight[:, i], [state_dict[k] for state_dict in state_dicts]
-        )
-        for i, k in enumerate(state_dicts[0].keys())
-    }
 class LayerWiseMergedModel(nn.Module):
     _merged_state_dict: StateDictType = None
@@ -390,7 +275,7 @@ class LayerWiseMergedModel(nn.Module):
         layer_vectors_scale = layer_vectors * layer_lamdas.view(-1, 1, 1)
         sum_over_num_vectors = layer_vectors_scale.sum(dim=0)
-        layer_delta_scale = layer_delta.unsqueeze(0) * layer_lamdas.view(-1, 1, 1)
+        layer_delta_scale = layer_delta * layer_lamdas.view(-1, 1, 1)
         sum_over_delta = layer_delta_scale.sum(dim=0)
         # Iterate through each vector and calculate the loss one by one

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.11
+Version: 0.2.12
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License
@@ -45,6 +45,7 @@ Requires-Dist: rich
 Requires-Dist: scipy
 Requires-Dist: h5py
 Requires-Dist: pytest
+Dynamic: license-file
 <div align='center'>
@@ -69,6 +70,18 @@ FusionBench is a benchmark suite designed to evaluate the performance of various
 Projects based on FusionBench and news from the community (descending order of date):
+<details>
+  <summary>Hao Mark Chen, et al. FW-Merging: Scaling Model Merging with Frank-Wolfe Optimization. Mar 2025. https://arxiv.org/abs/2503.12649</summary>
+Model merging has emerged as a promising approach for multi-task learning (MTL), offering a data-efficient alternative to conventional fine-tuning. However, with the rapid development of the open-source AI ecosystem and the increasing availability of fine-tuned foundation models, existing model merging methods face two key limitations: (i) They are primarily designed for in-house fine-tuned models, making them less adaptable to diverse model sources with partially unknown model and task information, (ii) They struggle to scale effectively when merging numerous model checkpoints. To address these challenges, we formulate model merging as a constrained optimization problem and introduce a novel approach: Frank-Wolfe Merging (FW-Merging). Inspired by Frank-Wolfe optimization, our approach iteratively selects the most relevant model in the pool to minimize a linear approximation of the objective function and then executes a local merging similar to the Frank-Wolfe update. The objective function is designed to capture the desired behavior of the target-merged model, while the fine-tuned candidate models define the constraint set. More importantly, FW-Merging serves as an orthogonal technique for existing merging methods, seamlessly integrating with them to further enhance accuracy performance. Our experiments show that FW-Merging scales across diverse model sources, remaining stable with 16 irrelevant models and improving by 15.3% with 16 relevant models on 20 CV tasks, while maintaining constant memory overhead, unlike the linear overhead of data-informed merging methods. Compared with the state-of-the-art approaches, FW-Merging surpasses the data-free merging method by 32.8% and outperforms the data-informed Adamerging by 8.39% when merging 20 ViT models.
+</details>
+<details>
+  <summary>Daniel Marczak, et al. No Task Left Behind: Isotropic Model Merging with Common and Task-Specific Subspaces. Feb 2025. https://arxiv.org/abs/2502.04959</summary>
+  Model merging integrates the weights of multiple task-specific models into a single multi-task model. Despite recent interest in the problem, a significant performance gap between the combined and single-task models remains. In this paper, we investigate the key characteristics of task matrices -- weight update matrices applied to a pre-trained model -- that enable effective merging. We show that alignment between singular components of task-specific and merged matrices strongly correlates with performance improvement over the pre-trained model. Based on this, we propose an isotropic merging framework that flattens the singular value spectrum of task matrices, enhances alignment, and reduces the performance gap. Additionally, we incorporate both common and task-specific subspaces to further improve alignment and performance. Our proposed approach achieves state-of-the-art performance across multiple scenarios, including various sets of tasks and model scales. This work advances the understanding of model merging dynamics, offering an effective methodology to merge models without requiring additional training.
+</details>
 <details>
   <summary>Anke Tang, et al. Merging Models on the Fly Without Retraining: A Sequential Approach to Scalable Continual Model Merging. Jan 2025. https://arxiv.org/pdf/2501.09522</summary>

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
 fusion_bench/__main__.py,sha256=weUjxpP3ULnDgUxCehdbmoCM9cqfkhDhGB85tAF5qoE,81
 fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/compat/method/__init__.py,sha256=97izLAf4JssNAoOXR4MYffFxb3OEwpHeQeSlL_ihMKI,5566
+fusion_bench/compat/method/__init__.py,sha256=qbm_0o4Y-X2FY3skmsQpYnKQ3qnR24Z0-uLOEnzO59M,5566
 fusion_bench/compat/method/base_algorithm.py,sha256=63_AQDj1eJOO6RyTSGXVC6G2DsG8yg9E4pT3RJXgP3A,1952
 fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py,sha256=m68BRGy4P-P9lLB10oXOBI-p58a-0FOPcrJ4r4MU32k,1100
 fusion_bench/compat/modelpool/__init__.py,sha256=KD8Ddr9D7rJ5YdHEQsTuNmQ0bgQfqF4l3WNMtHmRHD8,4687
@@ -15,7 +15,7 @@ fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2
 fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
 fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
 fusion_bench/dataset/clip_dataset.py,sha256=XLpCOiXlLEP3DffAlBn4P2PpUenbEFl-Yk9MNy6nbbI,2790
-fusion_bench/dataset/fer2013.py,sha256=Lub_xVhHfqaiPprvOsDVspJNioh1FjSrkhn3gL_UXDA,404
+fusion_bench/dataset/fer2013.py,sha256=bAdujQSj1PcUVFlKJgqcHAuE9AWz7JE1fzZ6scFVvmc,403
 fusion_bench/dataset/gpt2_glue.py,sha256=Qq1ZkEIQsTjj8tImvkZDNlduocSYwlEfVrDReZqDWdw,8761
 fusion_bench/dataset/gsm8k.py,sha256=CmANZ0A89PfPwVu_myKhXk1D9IwypOpjH3iqDo1KxcQ,2233
 fusion_bench/dataset/image_dataset.py,sha256=MSZE_UESyRRQDwnkm2KpyIARUg9SWcwqnH4fDNstzS4,1870
@@ -41,20 +41,16 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
 fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
 fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
 fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/method/__init__.py,sha256=QGJzdOpZxonu_WUNXSFQIiMy4OHsgqmcU5Bs6OB_RT0,7040
+fusion_bench/method/__init__.py,sha256=7S1ODkq2Zppx59o80qcIwDlRtfOC2EU58ooGFlDdJIU,7040
 fusion_bench/method/base_algorithm.py,sha256=5dutGZfPqNhO8F8FOlo3UFR91TZu2Xj7O0pTB40JvWo,1135
 fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
 fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
 fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
 fusion_bench/method/simple_average.py,sha256=2ghcL1E-eLbIYDCHYCoR9WtiYSb1GvFAH163OTTTEEI,4481
-fusion_bench/method/DOGE_TA/DOGE_TA.py,sha256=veNjBfq65fB7oqQL66zAuA339WCY5mG-mefkVteg2-k,13785
-fusion_bench/method/DOGE_TA/__init__.py,sha256=OTukCLUlbCUTDqGBtgBZop7eYFDfU2wjG4PkP4fXN4Q,59
-fusion_bench/method/DOGE_TA/clip_layer_wise_adamerging.py,sha256=YdQ4trHohW6QzWC2enYvXA44WHxvzmoH_6sMrPn6z60,1305
-fusion_bench/method/DOGE_TA/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
 fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
 fusion_bench/method/ada_svd/clip_vision.py,sha256=QrT6cSwgVEGxXEpVhkvKQVQaoRW5P9V52Y3_8NX0f-o,12556
 fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
-fusion_bench/method/adamerging/clip_layer_wise_adamerging.py,sha256=YdQ4trHohW6QzWC2enYvXA44WHxvzmoH_6sMrPn6z60,1305
+fusion_bench/method/adamerging/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
 fusion_bench/method/adamerging/clip_task_wise_adamerging.py,sha256=Tys9pDJzz5YNUCO43pO44fGAnizfSaeAwgH4-vVxRN4,6948
 fusion_bench/method/adamerging/entropy_loss.py,sha256=ZeVe0Hq1PaMfppLqDbB0MOscZUZRNh4CALrvt8pmQC0,736
 fusion_bench/method/adamerging/flan_t5_layer_wise_adamerging.py,sha256=osc6ueCgiS4u8KUV_sZkHGFBYC8dThnTSp4NB0wkQIg,12915
@@ -87,6 +83,10 @@ fusion_bench/method/dawe/warppers/dawe_model.py,sha256=Z1L91vu3UzEHWrHs9i9UbwZpn
 fusion_bench/method/depth_upscaling/__init__.py,sha256=heVUh4tTzK427A10RFknf9eHwoZ1cpn1_0xyNXRU7YM,135
 fusion_bench/method/depth_upscaling/depth_upscaling.py,sha256=pf08zEae-WaWM4oUwn6_Dm65K59wf9AbTQ5iZU0ydsc,3256
 fusion_bench/method/depth_upscaling/depth_upscaling_for_llama.py,sha256=bSMhnrG-JtR0JBnOFy7aWAhD6A-YBB84qm_YnWjc7pA,2180
+fusion_bench/method/doge_ta/__init__.py,sha256=dixO0i5fmhgC_W2_DAQ4PzYnkMCZX5D8tDz84soqQ-Q,59
+fusion_bench/method/doge_ta/clip_layer_wise_adamerging.py,sha256=UUSldRPBxHVOfkMM7ZwqZay5Wjc6XQ3Vy9PgyqV_TZo,1311
+fusion_bench/method/doge_ta/doge_ta.py,sha256=ec0qIq3F72nhbCVlfqdk1PYFM7QIlfMofeVFVvmDKiE,13785
+fusion_bench/method/doge_ta/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
 fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqvesqk2NJw5AY_1ztLVE,225
 fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
 fusion_bench/method/fisher_merging/fisher_merging.py,sha256=CPU-tJiDv9FCIBYl7Pn0zA5cdRB1Md5kWchRDlJgly0,20456
@@ -109,9 +109,9 @@ fusion_bench/method/mixture_of_experts/__init__.py,sha256=r95iu1-3tgIUP7sWuAbLuq
 fusion_bench/method/mixture_of_experts/mixtral_merging.py,sha256=-n1CLP1o08VyMSfaTq42kRutbw-cFDSCWHTu0iNh6ok,4237
 fusion_bench/method/mixture_of_experts/mixtral_upcycling.py,sha256=tQYAeS8MLFEfH3zDFfNZrML7lRnpGLN-HquQvjPtHNw,11208
 fusion_bench/method/opcm/__init__.py,sha256=0QcltOnjIYV1XEPDEagChLixLAhjiBnYwfWK00am29k,202
-fusion_bench/method/opcm/opcm.py,sha256=USPPMFFVQ9UbcGvvK1573tgkO1kgcrhA5jzKdbNTy9g,10693
-fusion_bench/method/opcm/task_arithmetic.py,sha256=SNuuSyzHqvOT_e3i0z0MHNWaMP6xnDdkI9c2t1OcxO4,4328
-fusion_bench/method/opcm/ties_merging.py,sha256=38ogIysnRfePhB9SAfr1BPwtHyM8gEdhU2td_yTiB2g,6080
+fusion_bench/method/opcm/opcm.py,sha256=-sqfK5q_-yr_3YWigmXKVYRP1J7swHOR9eGMMzu1Dgw,11445
+fusion_bench/method/opcm/task_arithmetic.py,sha256=YvtsWkjtnk7E3C4_xNr--uQWjQhoDZZB-klSx81_tGw,4824
+fusion_bench/method/opcm/ties_merging.py,sha256=-N3i7eMbhK95qyJsmmNMKNmPCkgGHGFa423a52cgi6g,6868
 fusion_bench/method/opcm/utils.py,sha256=_q7yy3ENNFUh1qUd5J5DThRL4J1tIxEcknCO2AKmeYM,2102
 fusion_bench/method/opcm/weight_average.py,sha256=JfQoIU5J1jvrNKpO9k_t4Zj0y8PtteIfyoSQWx1yg2k,4379
 fusion_bench/method/pruning/__init__.py,sha256=3gtmay2bkdIAEGjpAhbY2ztMZOZLKhiJcKV3mCe2H5w,252
@@ -259,8 +259,8 @@ fusion_bench/models/surgery/__init__.py,sha256=tcUSi2m9GzGWfvRDQScIbdEbFBS_35gm9
 fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNkEiPwNrN11ydyQQ1EZHehM,5133
 fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/models/wrappers/ensemble.py,sha256=wIMZMRyXw5boWAm96c4Tiyebs_HDQovKxpGQ8rLnHUQ,6308
-fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=ZizBGQtSLKOzMLFAhrMNMcv6ZNdvABTyO7M1-DGHh3c,12316
-fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=k335dxzq3ezuYkDVOv4ePi128NVyiHVCW6zyuDRTg30,20689
+fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=KamNaq4DlyxQrOp1i9aQLgA2WX81YD5NhzAQ5GF6rg0,11188
+fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=q5Hc4BtLpAawMbxsWJRL-8OR-x7994Jhr9IyN7vKZ9o,16930
 fusion_bench/models/wrappers/task_wise_fusion.py,sha256=Wn3buQvWw_lihWaKB03_iz34cBPzwBD94kBT6uafWVQ,8404
 fusion_bench/optim/__init__.py,sha256=lemrcuiA6OLjQkpYm-RP-Ox2MgjngN1ywvCo0NgShlM,61
 fusion_bench/optim/exception.py,sha256=fMgo1heiqfGhuI5RIbf30BwWSShn5RQiyeb30QtfTI0,1607
@@ -359,6 +359,7 @@ fusion_bench/utils/plot/token_notebook.py,sha256=bsntXf46Zz_RavTxNiB9c3-KvHw7LFw
 fusion_bench/utils/strenum/__init__.py,sha256=id9ORi1uXrDxhbmVxitJ1KDwLS4H3AAwFpaK5h1cQzw,8531
 fusion_bench/utils/strenum/_name_mangler.py,sha256=o11M5-bURW2RBvRTYXFQIPNeqLzburdoWLIqk8X3ydw,3397
 fusion_bench/utils/strenum/_version.py,sha256=6JQRo9LcvODbCOeVFYQb9HNJ_J9XiG_Zbn8ws2A3BV8,18466
+fusion_bench-0.2.12.dist-info/licenses/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
 fusion_bench_config/README.md,sha256=Lc8YSBJ5oxf9KV5kKDivJ9LRyGuraGQPmBbgbdVA-j4,703
 fusion_bench_config/clip-vit-base-patch32_robustness_corrupted.yaml,sha256=GtK3VuD2FOpFHH_1Hi6tlaYpdLE5Cz0nYKP92Ss9G2Y,1164
 fusion_bench_config/fabric_model_fusion.yaml,sha256=1shmbuC0B9snkFkLErBCiroF-z7UnEHscyEmKBne7Oo,949
@@ -472,7 +473,6 @@ fusion_bench_config/method/pwe_moe_ls_for_clip.yaml,sha256=brs9zYeuXfFnnCoRrSaAY
 fusion_bench_config/method/simple_average.yaml,sha256=GtMNvt0-qWOevRX2V6fjiYUO2BwDvMw-EcxRMS_PhZQ,53
 fusion_bench_config/method/task_arithmetic.yaml,sha256=TbpAeTwIX48PFOkZU-Ihuu6U9Y5XHZJGDu7vHLt5FjU,74
 fusion_bench_config/method/ties_merging.yaml,sha256=N-XyOTEW0JRtyRJizpHqtb1GEIogUU22XSG76QvIvnw,292
-fusion_bench_config/method/DOGE_TA/DOGE_TA.yaml,sha256=6R9NRuWmj0oapJ_raMB6R6rZPMckt2JtMLrTQ6HhrFc,77
 fusion_bench_config/method/ada_svd/clip_vision.yaml,sha256=KDpDpzuNVqqyyqJcL0q-Ml2A7IUqn_-2dOZXs8zHKlU,184
 fusion_bench_config/method/adamerging/clip.yaml,sha256=fBG7jBBepygKpCbM3fmUeVAr2zzx0g8C21rGGfnEPkA,730
 fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml,sha256=7FPPMf6lcOD2dlNUbb5JyF3pqJ3D2jmvbWAbW9WGn0Y,546
@@ -493,6 +493,7 @@ fusion_bench_config/method/dare/simple_average.yaml,sha256=oTFSCHul86NTjTtJYK5pN
 fusion_bench_config/method/dare/task_arithmetic.yaml,sha256=Cvsam89yquamn_GkITT6q8qFKN_Yb5nv8p-XgvnVrgU,134
 fusion_bench_config/method/dare/ties_merging.yaml,sha256=50mPiRkzLN7gxaIs56sPWkAUSvqvdxjQJ8eVl1yUGOg,418
 fusion_bench_config/method/dawe/dawe_for_clip.yaml,sha256=8-Z_kwwGCy1AO4brW-R_pe8oJ0yqoD4WCLI9ZtJ4KOo,1026
+fusion_bench_config/method/doge_ta/doge_ta.yaml,sha256=6R9NRuWmj0oapJ_raMB6R6rZPMckt2JtMLrTQ6HhrFc,77
 fusion_bench_config/method/ensemble/max_model_predictor.yaml,sha256=fsWuNJwr1ohVB2aJ5L2fsiDLztm5GieE9JS99w--two,56
 fusion_bench_config/method/ensemble/simple_ensemble.yaml,sha256=bw9FabjhQYNbttsiMgTVd-Z4KIowf050Uy97vKtm2ys,55
 fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=U_wQXtogtgiqOTszHUgcGNfrKlXD6JrR_HjqNwAkkKo,262
@@ -691,7 +692,8 @@ fusion_bench_config/modelpool/CausalLMPool/single_llama_model.yaml,sha256=aX0rWw
 fusion_bench_config/modelpool/Seq2SeqLMPool/_template.yaml,sha256=mRx-Xx4s6_IBoJJRogIBW4egmqW0wi1kGVWp_YwYVvQ,233
 fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue.yaml,sha256=6Rgfq3cjCRWbAL8Bb-Dkvl9eJP4FKmqewBpokajwYWU,335
 fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16.yaml,sha256=1vaVb059Wh3XMD8MhXD9p5a0zx8mi9HovOcS0k51uK8,1699
-fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml,sha256=GfTY343bt5YtxtUkQxSacrtQav9lT9Y-t1VIL1Chs4k,1726
+fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml,sha256=dwBb3wPfyxH6cx6txBd31OOlrfCvPkM-nIN46FJer-I,1790
+fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_tta.yaml,sha256=2BBuK1uyKL_9uo3X3bScjZiK-PtIiE_7RHj4onK_3R0,1725
 fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_individual.yaml,sha256=2YBIzqYGluOT2r6dOFpUYE4Cbdd2XoHAUps-kCDxVPQ,185
 fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-large_glue_lora16.yaml,sha256=W1y3fKY9UTTRyv7nqbIO5DESlQVfNsWlhkHJMUYh7B4,1824
 fusion_bench_config/modelpool/SeqenceClassificationModelPool/llama_preference700k.yaml,sha256=JUzGOLANW92Y_rljOOZKmwBQvWrJsko_ziayurzHSTY,880
@@ -736,9 +738,8 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml,sha256=2AqMiNCRRunLIrssHvFzu1lUzOaQn8uOHM9yjrQq-_A,109
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=iQMj2VpDTe_D8OfCo94w5Ud2MON-EGa0DzVr6UmphrA,436
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=i5Bn8bLl2cgqvrgtIGmoovUfSMehk_m-6C2wwcx5JMU,435
-fusion_bench-0.2.11.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
-fusion_bench-0.2.11.dist-info/METADATA,sha256=AYdGcKXZ6BeHCv1piGgpK1yktQqVga-PjUDxS4RYwog,16780
-fusion_bench-0.2.11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
-fusion_bench-0.2.11.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
-fusion_bench-0.2.11.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
-fusion_bench-0.2.11.dist-info/RECORD,,
+fusion_bench-0.2.12.dist-info/METADATA,sha256=V0KZSil6pMjhZVA3x0wUrW-eskY5DsyclRkiuh8sfec,20085
+fusion_bench-0.2.12.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+fusion_bench-0.2.12.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
+fusion_bench-0.2.12.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
+fusion_bench-0.2.12.dist-info/RECORD,,

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.8.2)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_lora16_tta.yaml CHANGED Viewed

@@ -1,14 +1,14 @@
 defaults:
   - /model/flan-t5@models:
       - flan-t5-base
-      - flan-t5-base_glue-cola
-      - flan-t5-base_glue-mnli
-      - flan-t5-base_glue-mrpc
-      - flan-t5-base_glue-qnli
-      - flan-t5-base_glue-qqp
-      - flan-t5-base_glue-rte
-      - flan-t5-base_glue-sst2
-      - flan-t5-base_glue-stsb
+      - flan-t5-base_glue-cola_lora-16
+      - flan-t5-base_glue-mnli_lora-16
+      - flan-t5-base_glue-mrpc_lora-16
+      - flan-t5-base_glue-qnli_lora-16
+      - flan-t5-base_glue-qqp_lora-16
+      - flan-t5-base_glue-rte_lora-16
+      - flan-t5-base_glue-sst2_lora-16
+      - flan-t5-base_glue-stsb_lora-16
 _target_: fusion_bench.modelpool.Seq2SeqLMPool
 _recursive_: false

fusion_bench_config/modelpool/Seq2SeqLMPool/flan-t5-base_glue_tta.yaml ADDED Viewed

@@ -0,0 +1,68 @@
+defaults:
+  - /model/flan-t5@models:
+      - flan-t5-base
+      - flan-t5-base_glue-cola
+      - flan-t5-base_glue-mnli
+      - flan-t5-base_glue-mrpc
+      - flan-t5-base_glue-qnli
+      - flan-t5-base_glue-qqp
+      - flan-t5-base_glue-rte
+      - flan-t5-base_glue-sst2
+      - flan-t5-base_glue-stsb
+_target_: fusion_bench.modelpool.Seq2SeqLMPool
+_recursive_: false
+_dataset_loader: fusion_bench.tasks.flan_t5_text_generation.glue_load_dataset.load_glue_dataset
+test_datasets:
+  glue-cola:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: cola
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-mnli:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: mnli
+    tokenizer: ${...tokenizer}
+    split: validation_matched
+  glue-mrpc:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: mrpc
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-qnli:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: qnli
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-qqp:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: qqp
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-rte:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: rte
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-sst2:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: sst2
+    tokenizer: ${...tokenizer}
+    split: validation
+  glue-stsb:
+    _target_: ${..._dataset_loader}
+    _recursive_: false
+    name: stsb
+    tokenizer: ${...tokenizer}
+    split: validation
+tokenizer:
+  _target_: transformers.AutoTokenizer.from_pretrained
+  pretrained_model_name_or_path: google/flan-t5-base

fusion_bench/method/DOGE_TA/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- # flake8: noqa F401
2	- from .DOGE_TA import DOGE_TA_Algorithm

/fusion_bench/method/{DOGE_TA → doge_ta}/layer_wise_adamerging.py RENAMED Viewed

File without changes

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{fusion_bench-0.2.11.dist-info → fusion_bench-0.2.12.dist-info}/top_level.txt RENAMED Viewed

File without changes

/fusion_bench_config/method/{DOGE_TA/DOGE_TA.yaml → doge_ta/doge_ta.yaml} RENAMED Viewed

File without changes

fusion-bench 0.2.11__py3-none-any.whl → 0.2.12__py3-none-any.whl

fusion-bench 0.2.11py3-none-any.whl → 0.2.12py3-none-any.whl