PyPI - fusion-bench - Versions diffs - 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl - Mend

fusion-bench 0.2.9py3-none-any.whl → 0.2.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py ADDED Viewed

@@ -0,0 +1,531 @@
+import copy
+import functools
+import logging
+from collections import OrderedDict
+from copy import deepcopy
+from typing import Any, Callable, Dict, Iterator, List, Optional  # noqa: F401
+import lightning as L
+import torch
+from torch import Tensor, nn
+from torch.func import functional_call
+from fusion_bench.utils.state_dict_arithmetic import state_dict_add
+from fusion_bench.utils.type import StateDictType
+__all__ = ["get_layer_wise_weights", "fuse_weights", "LayerWiseMergedModel"]
+log = logging.getLogger(__name__)
+def del_attr(obj, names: List[str]):
+    """
+    Deletes an attribute from an object recursively.
+    Args:
+        obj (object): Object to delete attribute from.
+        names (list): List of attribute names to delete recursively.
+    """
+    if len(names) == 1:
+        delattr(obj, names[0])
+    else:
+        del_attr(getattr(obj, names[0]), names[1:])
+def set_attr(obj, names: List[str], val):
+    """
+    Sets an attribute of an object recursively.
+    Args:
+        obj (object): Object to set attribute of.
+        names (list): List of attribute names to set recursively.
+        val (object): Value to set the attribute to.
+    """
+    if len(names) == 1:
+        setattr(obj, names[0], val)
+    else:
+        set_attr(getattr(obj, names[0]), names[1:], val)
+def get_attr(obj, names: List[str]):
+    """
+    Gets an attribute of an object recursively.
+    Args:
+        obj (object): Object to get attribute of.
+        names (list): List of attribute names to get recursively.
+    Returns:
+        object: The attribute of the object.
+    """
+    if len(names) == 1:
+        return getattr(obj, names[0])
+    else:
+        return get_attr(getattr(obj, names[0]), names[1:])
+def get_layer_wise_weights(
+    num_models: int,
+    num_layers: int,
+    init_values: float = None,
+    dtype: torch.dtype = torch.float32,
+):
+    """
+    Return a tensor of layer-wise weights for the given number of models and layers.
+    Args:
+        num_models (int): The number of models to fuse.
+        num_layers (int): The number of layers in each model.
+        init_values (float, optional): The initial value for each weight. Defaults to 1.0 / num_models.
+        dtype (torch.dtype): dtype of weights. This should be the same with model dtype.
+    Returns:
+        Tensor: A tensor of shape (num_models, num_layers) containing the layer-wise weights.
+    """
+    assert num_models >= 1, f"num_models must be >= 1, got {num_models}"
+    assert num_layers >= 1, f"num_layers must be >= 1, got {num_layers}"
+    if init_values is None:
+        init_values = 1.0 / num_models
+    return torch.full((num_models, num_layers), init_values, dtype=dtype)
+def _fuse_weights(layer_wise_weight: Tensor, tensors: List[Tensor]):
+    """
+    Fuse the layer-wise weights with the given state dictionaries.
+    Args:
+        layer_wise_weight (Tensor): A tensor of shape (num_models,) containing the layer-wise weights.
+        state_dicts (List[Tensor]): A list of state dictionaries, each containing the weights for a single layer.
+    Returns:
+        Tensor: A tensor of shape (num_params,) containing the fused weights.
+    """
+    assert len(layer_wise_weight) == len(
+        tensors
+    ), f"layer_wise_weight.shape={layer_wise_weight.shape}, len(tensors)={len(tensors)}"
+    return sum(
+        layer_wise_weight[i] * w.to(layer_wise_weight.device)
+        for i, w in enumerate(tensors)
+    )
+def fuse_weights(
+    layer_wise_weight: Tensor, state_dicts: List[StateDictType]
+) -> StateDictType:
+    """
+    Fuse the weights of multiple models using layer-wise fusion.
+    Args:
+        layer_wise_weight (Tensor): A tensor of shape (num_models, num_layers) representing the weight of each layer for each model.
+        state_dicts (List[StateDict]): A list of state dictionaries, one for each model.
+    Returns:
+        A dictionary mapping each weight tensor key to the fused weight tensor.
+    """
+    num_models = len(state_dicts)
+    num_layers = len(state_dicts[0])
+    assert layer_wise_weight.shape == (
+        num_models,
+        num_layers,
+    ), f"layer_wise_weight.shape={layer_wise_weight.shape}, expected (num_models, num_layers): ({num_models}, {num_layers})"
+    return {
+        k: _fuse_weights(
+            layer_wise_weight[:, i], [state_dict[k] for state_dict in state_dicts]
+        )
+        for i, k in enumerate(state_dicts[0].keys())
+    }
+class LayerWiseMergedModel(nn.Module):
+    _merged_state_dict: StateDictType = None
+    def __init__(
+        self,
+        layer_wise_weight: Tensor,
+        pretrained_model: nn.Module,
+        finetuned_models: List[nn.Module],
+        clamp_weights: bool = True,
+        tie_weights: bool = False,
+        strict: bool = True,
+        sparsity_ratio: Optional[float] = None,
+        normalized_merging_weights: bool = False,
+    ):
+        R"""
+        This class wraps a pretrained model and a list of finetuned models, and merges the weights of the finetuned models into the pretrained model using layer-wise fusion.
+        Args:
+            layer_wise_weight (Tensor): A tensor of shape (num_models, num_layers) representing the weight of each layer for each model.
+            pretrained_model (nn.Module): The pretrained model to merge the weights into.
+            finetuned_models (List[nn.Module]): A list of finetuned models to merge the weights from. This should have the same architecture as the pretrained model. We use these models to compute the task vectors.
+            clamp_weights (bool, optional): If True, the layer-wise weights will be clamped to [0, 1]. Defaults to True.
+            tie_weights (bool, optional): This option passes the `tie_weights` argument to the `functional_call` function. Defaults to False.
+            strict (bool, optional): This option passes the `strict` argument to the `functional_call` function. Defaults to True.
+            sparsity_ratio (float, optional): If `sparsity_ratio` is provided, the task vector will be pruned before merging. A high spasity level can save the memory usage during merging.
+            normalized_merging_weights (bool, optional): If True, the layer-wise weights will be normalized for each layer, so that the sum of weights across models for each layer is 1. Defaults to False.
+        """
+        super().__init__()
+        if torch.cuda.is_available():
+            self._fabric = L.Fabric(devices=1)
+            self._fabric.launch()
+        self.clamp_weights = clamp_weights
+        self.tie_weights = tie_weights
+        self.strict = strict
+        self.sparsity_ratio = sparsity_ratio
+        self.nromalized_merging_weights = normalized_merging_weights
+        pretrained_sd = pretrained_model.state_dict(keep_vars=True)
+        filtered_keys = [
+            k
+            for k in pretrained_sd.keys()
+            if ("encoder" in k and "layer_norm" not in k and "weight" in k)
+        ]
+        self.merge_weight = nn.Parameter(
+            layer_wise_weight[:, : len(filtered_keys)], requires_grad=True
+        )
+        task_vectors = []
+        for m in finetuned_models:
+            m.requires_grad_(False)
+        self.pretrained_model = pretrained_model.requires_grad_(False)
+        for model in finetuned_models:
+            model_sd = model.state_dict(keep_vars=True)
+            filtered_task_vector = {
+                k: (model_sd[k] - pretrained_sd[k]) for k in filtered_keys
+            }
+            if self._fabric is not None:
+                filtered_task_vector = self._fabric.to_device(filtered_task_vector)
+            task_vectors.append(filtered_task_vector)
+        self.projection = {}
+        for layer_name in task_vectors[0].keys():
+            for i, vector in enumerate(task_vectors):
+                layer_vector = vector[layer_name]
+                u, s, v = torch.linalg.svd(layer_vector, full_matrices=False)
+                if i == 0:
+                    print(f"Computed SVD for {layer_name}...")
+                    sum_u = torch.zeros_like(u, device=layer_vector.device)
+                    sum_s = torch.zeros_like(s, device=layer_vector.device)
+                    sum_v = torch.zeros_like(v, device=layer_vector.device)
+                reduced_index_s = int(s.shape[0] / len(task_vectors))
+                # select only the first reduced_index_s columns of u and place them
+                sum_u[:, i * reduced_index_s : (i + 1) * reduced_index_s] = u[
+                    :, :reduced_index_s
+                ]
+                sum_s[i * reduced_index_s : (i + 1) * reduced_index_s] = s[
+                    :reduced_index_s
+                ]
+                # select only the first reduced_index_s rows of v and place them
+                sum_v[i * reduced_index_s : (i + 1) * reduced_index_s, :] = v[
+                    :reduced_index_s, :
+                ]
+            u_u, s_u, v_u = torch.linalg.svd(sum_u, full_matrices=False)
+            # u_v, s_v, v_v = torch.linalg.svd(sum_v, full_matrices=False)
+            layer_proj = torch.matmul(
+                u_u[:, : int(s.shape[0] / len(task_vectors))],
+                u_u[:, : int(s.shape[0] / len(task_vectors))].T,
+            )
+            self.projection[layer_name] = layer_proj
+        self.delta = [
+            {
+                k: torch.zeros_like(v).clone().requires_grad_()
+                for k, v in task_vector.items()
+            }
+            for task_vector in task_vectors
+        ]
+        if self._fabric is not None:
+            self.delta = self._fabric.to_device(self.delta)
+        self.lamdas = self.compute_layer_lamdas(task_vectors)
+        for layer_name in task_vectors[0].keys():
+            optimizer = torch.optim.Adam(
+                [delta[layer_name] for delta in self.delta], lr=1e-4
+            )
+            layer_vectors = torch.stack([vec[layer_name] for vec in task_vectors])
+            layer_lamdas = torch.stack([lamdas[layer_name] for lamdas in self.lamdas])
+            for _ in range(400):
+                optimizer.zero_grad()
+                layer_delta = torch.stack([de[layer_name] for de in self.delta])
+                loss = self.taskvector_loss(layer_vectors, layer_delta, layer_lamdas)
+                print(f"Epoch: {_}, Layer: {layer_name}, Loss: {loss.item()}")
+                self._fabric.backward(loss)
+                for delta in self.delta:
+                    grad_proj = (
+                        self.projection[layer_name] @ delta[layer_name].grad.detach()
+                    )
+                    delta[layer_name].grad.data = delta[layer_name].grad.data.sub_(
+                        grad_proj
+                    )
+                optimizer.step()
+                for delta in self.delta:
+                    for param in delta.values():
+                        param.grad = None
+        del self.projection
+        self.delta = [
+            {key: param.detach().cpu() for key, param in delta.items()}
+            for delta in self.delta
+        ]
+        self.lamdas = [
+            {key: param.cpu() for key, param in lamdas.items()}
+            for lamdas in self.lamdas
+        ]
+        task_vectors = [
+            {k: v.cpu() for k, v in task_vector.items()} for task_vector in task_vectors
+        ]
+        flat_vectors = []
+        vector_masks = []
+        for idx, task_vector in enumerate(task_vectors):
+            flat_vector = self.state_dict_to_vector(task_vector)
+            vector_mask = self.topk_values_mask(flat_vector, K=30)
+            flat_vectors.append(flat_vector)
+            vector_masks.append(vector_mask)
+        flat_deltas = [self.state_dict_to_vector(delta) for delta in self.delta]
+        self.task_vectors = [
+            self.vector_to_state_dict(
+                (flat_vector + flat_delta) * vector_mask, self.delta[0]
+            )
+            for flat_vector, flat_delta, vector_mask in zip(
+                flat_vectors, flat_deltas, vector_masks
+            )
+        ]
+        if self._fabric is not None:
+            self.task_vectors = self._fabric.to_device(self.task_vectors)
+        # if `sparisty_ratio` is given, pruning the task vectors.
+        if sparsity_ratio is not None:
+            from fusion_bench.method.pruning.prune_utils import (
+                unstructured_magnitude_prune_,
+            )
+            for name, param in self.task_vectors.named_parameters():
+                if param.dim() != 2:
+                    continue
+                print(f"pruning {name}")
+                pruned_param = unstructured_magnitude_prune_(
+                    param.data.clone(), torch.abs, sparsity_ratio=sparsity_ratio
+                )
+                set_attr(
+                    self.task_vectors,
+                    name.split("."),
+                    nn.Parameter(pruned_param.to_sparse(), requires_grad=False),
+                )
+    def topk_values_mask(self, M, K):
+        if K > 1:
+            K /= 100
+        original_shape = M.shape
+        if M.dim() == 1:
+            M = M.unsqueeze(0)
+        n, d = M.shape
+        k = int(d * K)
+        k = d - k  # Keep top k elements instead of bottom k elements
+        # Find the k-th smallest element by magnitude for each row
+        kth_values, _ = M.abs().kthvalue(k, dim=1, keepdim=True)
+        # Create a mask tensor with True for the top k elements in each row
+        mask = M.abs() >= kth_values
+        final_mask = mask.squeeze() if original_shape == M.squeeze().shape else mask
+        return final_mask
+    def state_dict_to_vector(self, state_dict, remove_keys=[]):
+        """
+        Convert a state dictionary to a vector, removing specified keys.
+        Args:
+            state_dict (dict): The state dictionary to convert.
+            remove_keys (list): List of keys to remove from the state dictionary.
+        Returns:
+            Tensor: A vector representation of the state dictionary.
+        """
+        shared_state_dict = copy.deepcopy(state_dict)
+        for key in remove_keys:
+            if key in shared_state_dict:
+                del shared_state_dict[key]
+        sorted_shared_state_dict = OrderedDict(sorted(shared_state_dict.items()))
+        return nn.utils.parameters_to_vector(
+            [value.reshape(-1) for key, value in sorted_shared_state_dict.items()]
+        )
+    def vector_to_state_dict(self, vector, state_dict, remove_keys=[]):
+        """
+        Convert a vector back to a state dictionary, removing specified keys.
+        Args:
+            vector (Tensor): The vector to convert.
+            state_dict (dict): The reference state dictionary.
+            remove_keys (list): List of keys to remove from the state dictionary.
+        Returns:
+            dict: A state dictionary representation of the vector.
+        """
+        # create a reference dict to define the order of the vector
+        reference_dict = copy.deepcopy(state_dict)
+        for key in remove_keys:
+            if key in reference_dict:
+                del reference_dict[key]
+        sorted_reference_dict = OrderedDict(sorted(reference_dict.items()))
+        # create a shared state dict using the reference dict
+        nn.utils.vector_to_parameters(vector, sorted_reference_dict.values())
+        # add back the encoder and decoder embedding weights.
+        if "transformer.shared.weight" in sorted_reference_dict:
+            for key in remove_keys:
+                sorted_reference_dict[key] = sorted_reference_dict[
+                    "transformer.shared.weight"
+                ]
+        return sorted_reference_dict
+    def taskvector_loss(self, layer_vectors, layer_delta, layer_lamdas) -> torch.Tensor:
+        """
+        Computes the loss based on delta and task vectors for a specific layer.
+        """
+        total_loss = 0.0
+        layer_vectors_scale = layer_vectors * layer_lamdas.view(-1, 1, 1)
+        sum_over_num_vectors = layer_vectors_scale.sum(dim=0)
+        layer_delta_scale = layer_delta.unsqueeze(0) * layer_lamdas.view(-1, 1, 1)
+        sum_over_delta = layer_delta_scale.sum(dim=0)
+        # Iterate through each vector and calculate the loss one by one
+        for v_j in layer_vectors:
+            part1 = -v_j * sum_over_num_vectors
+            part2 = -v_j * sum_over_delta
+            part3 = v_j * v_j
+            expression = part1 + part2 + part3
+            layer_loss = expression.sum(dim=1).pow(2).sum()
+            # Cumulative total loss
+            total_loss += layer_loss
+        return total_loss
+    def compute_layer_lamdas(self, vectors: List[StateDictType]) -> torch.Tensor:
+        lamdas = []
+        for vec in vectors:
+            tmp = {}
+            for layer_name in vec.keys():
+                norm_vec = torch.norm(vec[layer_name])
+                tmp[layer_name] = 0.07 / norm_vec
+            lamdas.append(tmp)
+        return lamdas
+    @property
+    def forward_model(self):
+        return functools.partial(
+            functional_call,
+            self.pretrained_model,
+            self._merged_state_dict,
+            tie_weights=self.tie_weights,
+            strict=self.strict,
+        )
+    def merge_and_unload(self, task_vector_mask: Optional[Dict[str, Tensor]] = None):
+        self.merge_weights(task_vector_mask=task_vector_mask)
+        self.pretrained_model.load_state_dict(self._merged_state_dict)
+        return self.pretrained_model
+    def merge_weights(self, task_vector_mask: Optional[Dict[str, Tensor]] = None):
+        """
+        Merges the weights of the model.
+        Call this after each update step.
+        """
+        if self.clamp_weights:
+            layer_wise_weight = self.merge_weight.clamp(0, 1)
+        else:
+            layer_wise_weight = self.merge_weight
+        if self.nromalized_merging_weights:
+            # normalize the weights for each layer, so that the sum of weights across models for each layer is 1.
+            layer_wise_weight = layer_wise_weight.softmax(dim=0)
+        state_dict = self.pretrained_model.state_dict(keep_vars=True)
+        # shape of layer_wise_weight: (num_models, num_layers)
+        for weight, task_vector in zip(layer_wise_weight, self.task_vectors):
+            task_vector_items = list(task_vector.items())
+            for w, (name, param) in zip(weight, task_vector_items):
+                state_dict[name] = state_dict[name] + param * w
+        self._merged_state_dict = state_dict
+        return state_dict
+    def forward(self, *args, **kwargs):
+        if self._merged_state_dict is None:
+            self.merge_weights()
+        return self.forward_model(args=args, kwargs=kwargs)
+    # def __getattr__(self, name: str) -> Any:
+    #     try:
+    #         return super().__getattr__(name)
+    #     except AttributeError:
+    #         attr = getattr(self.model, name)
+    #         if isinstance(attr, Callable):
+    #             warnings.warn(
+    #                 f"forwarding `{name}` to the underlying model", UserWarning
+    #             )
+    #         return attr
+    # def __setattr__(self, name: str, value: Any) -> None:
+    #     try:
+    #         super().__setattr__(name, value)
+    #     except AttributeError:
+    #         setattr(self.model, name, value)
+def merge_weights(module: nn.Module):
+    """
+    Merges the weights for all `LayerWiseMergedModel` instances within the given module.
+    Args:
+        module (nn.Module): The module to process.
+    """
+    if isinstance(module, LayerWiseMergedModel):
+        module.merge_weights()
+        return
+    else:
+        for submodule in module.children():
+            merge_weights(submodule)
+def merge_and_unload(module: nn.Module):
+    """
+    Merges and unloads all `LayerWiseMergedModel` instances within the given module.
+    Args:
+        module (nn.Module): The module to process.
+    Returns:
+        nn.Module: The updated module with merged weights.
+    """
+    if isinstance(module, LayerWiseMergedModel):
+        return module.merge_and_unload()
+    else:
+        for name, submodule in module.named_children():
+            need_merge = isinstance(submodule, LayerWiseMergedModel)
+            submodule = merge_and_unload(submodule)
+            if need_merge:
+                setattr(module, name, submodule)
+        return module
+def fix_other_parts(module: nn.Module):
+    """
+    Sets all parameters in the module to not require gradients, except for the merge weights
+    in `LayerWiseMergedModel` instances.
+    Args:
+        module (nn.Module): The module to process.
+    Returns:
+        nn.Module: The module with updated parameter requirements.
+    """
+    module.requires_grad_(False)
+    for submodule in module.modules():
+        if isinstance(submodule, LayerWiseMergedModel):
+            submodule.merge_weight.requires_grad_(True)
+    return module

{fusion_bench-0.2.9.dist-info → fusion_bench-0.2.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: fusion_bench
-Version: 0.2.9
+Version: 0.2.11
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 License: MIT License

{fusion_bench-0.2.9.dist-info → fusion_bench-0.2.11.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 fusion_bench/__init__.py,sha256=68dF-zPvb8E2MgYnmgIJsxIHJBy1MApKeOrRZvQEVlg,421
 fusion_bench/__main__.py,sha256=weUjxpP3ULnDgUxCehdbmoCM9cqfkhDhGB85tAF5qoE,81
 fusion_bench/compat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/compat/method/__init__.py,sha256=KUKHpX7AfvB7fmOAlruWp0r1z17xpkI9l29PMvLWR9A,4956
+fusion_bench/compat/method/__init__.py,sha256=97izLAf4JssNAoOXR4MYffFxb3OEwpHeQeSlL_ihMKI,5566
 fusion_bench/compat/method/base_algorithm.py,sha256=63_AQDj1eJOO6RyTSGXVC6G2DsG8yg9E4pT3RJXgP3A,1952
 fusion_bench/compat/modelpool/AutoModelForSeq2SeqLM.py,sha256=m68BRGy4P-P9lLB10oXOBI-p58a-0FOPcrJ4r4MU32k,1100
 fusion_bench/compat/modelpool/__init__.py,sha256=KD8Ddr9D7rJ5YdHEQsTuNmQ0bgQfqF4l3WNMtHmRHD8,4687
@@ -15,7 +15,7 @@ fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2
 fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
 fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
 fusion_bench/dataset/clip_dataset.py,sha256=XLpCOiXlLEP3DffAlBn4P2PpUenbEFl-Yk9MNy6nbbI,2790
-fusion_bench/dataset/fer2013.py,sha256=bAdujQSj1PcUVFlKJgqcHAuE9AWz7JE1fzZ6scFVvmc,403
+fusion_bench/dataset/fer2013.py,sha256=Lub_xVhHfqaiPprvOsDVspJNioh1FjSrkhn3gL_UXDA,404
 fusion_bench/dataset/gpt2_glue.py,sha256=Qq1ZkEIQsTjj8tImvkZDNlduocSYwlEfVrDReZqDWdw,8761
 fusion_bench/dataset/gsm8k.py,sha256=CmANZ0A89PfPwVu_myKhXk1D9IwypOpjH3iqDo1KxcQ,2233
 fusion_bench/dataset/image_dataset.py,sha256=MSZE_UESyRRQDwnkm2KpyIARUg9SWcwqnH4fDNstzS4,1870
@@ -41,12 +41,16 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
 fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
 fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
 fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-fusion_bench/method/__init__.py,sha256=Hx_e9afWildQmZsHvInllReP1ACN2LfF8MjQqb3gkKc,6228
+fusion_bench/method/__init__.py,sha256=QGJzdOpZxonu_WUNXSFQIiMy4OHsgqmcU5Bs6OB_RT0,7040
 fusion_bench/method/base_algorithm.py,sha256=5dutGZfPqNhO8F8FOlo3UFR91TZu2Xj7O0pTB40JvWo,1135
 fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
 fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
 fusion_bench/method/model_recombination.py,sha256=2tviqmYSPOL0_Ktv8_gt_YzQ4tyCANHxXquUot_3Cgo,5360
 fusion_bench/method/simple_average.py,sha256=2ghcL1E-eLbIYDCHYCoR9WtiYSb1GvFAH163OTTTEEI,4481
+fusion_bench/method/DOGE_TA/DOGE_TA.py,sha256=veNjBfq65fB7oqQL66zAuA339WCY5mG-mefkVteg2-k,13785
+fusion_bench/method/DOGE_TA/__init__.py,sha256=OTukCLUlbCUTDqGBtgBZop7eYFDfU2wjG4PkP4fXN4Q,59
+fusion_bench/method/DOGE_TA/clip_layer_wise_adamerging.py,sha256=YdQ4trHohW6QzWC2enYvXA44WHxvzmoH_6sMrPn6z60,1305
+fusion_bench/method/DOGE_TA/layer_wise_adamerging.py,sha256=rLk3Nep5d6wMUNCp6q7pC7L0pfBvUwGBIuiGM7CQOf4,9780
 fusion_bench/method/ada_svd/__init__.py,sha256=4XzQbbvE9HI3NtEmEFvo8iC3ds_85vJXe7P7qJfL7kk,77
 fusion_bench/method/ada_svd/clip_vision.py,sha256=QrT6cSwgVEGxXEpVhkvKQVQaoRW5P9V52Y3_8NX0f-o,12556
 fusion_bench/method/adamerging/__init__.py,sha256=nt0saBT_3bqghk-pINQ-XCWm9UWwSZllu4R1sDuAJAA,376
@@ -65,10 +69,12 @@ fusion_bench/method/analysis/task_vector_cos_similarity.py,sha256=pL-XsWTo258yZT
 fusion_bench/method/analysis/task_vector_violin_plot.py,sha256=ie8hPl6QsVz9MQ6C2OEpzIBxQnmVKNf1FPc5bThmQGM,7606
 fusion_bench/method/classification/__init__.py,sha256=emB06UOMDHK5pfQ1WuvLG9Fm0aEEtZxSjpVw8fVE0fM,167
 fusion_bench/method/classification/clip_finetune.py,sha256=DlV1isp8vz6jwXNYQ6zbblAoUfnssL-WBpDeaXI5BVw,15727
-fusion_bench/method/classification/continual_clip_finetune.py,sha256=v_2tmkb92okNbKRiW7AYdGEA7y2wQUwEjD01aeI99v0,11530
-fusion_bench/method/concrete_subspace/__init__.py,sha256=yjadcpquHZbeZYsbfYhe2JlX46kObfiWJRsIoVcOEg4,223
+fusion_bench/method/classification/continual_clip_finetune.py,sha256=OLhZKS-6aCnafevZkZYcNMKTWDDj3DATB27eZl_i8EY,11530
+fusion_bench/method/concrete_subspace/__init__.py,sha256=jJoFcjnQe-jvccsm9DuCXna378m9XBT9vV1fEZbdfR0,464
 fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py,sha256=90_0HkOIl0XQG89xMa0UiBhrwfV2YqfLxlS04AouR3o,24755
 fusion_bench/method/concrete_subspace/clip_concrete_task_arithmetic.py,sha256=Nx-3AiAeIt5zmcC21Ta2_-4cAQg9hOWvThurXNZzA-w,10580
+fusion_bench/method/concrete_subspace/clip_post_defense.py,sha256=h-c0ioxDopg7pUoRjxx3epqQxVKZAZWz8s7yHjM88mg,32355
+fusion_bench/method/concrete_subspace/clip_safe_concrete_adamerging.py,sha256=eEKKUBgHufYTBaWWxkIKDF0lkuLI2bBgNHVr1JqT41c,35694
 fusion_bench/method/dare/__init__.py,sha256=63Xwkawyl_Ooy4xFxoDlP6wf-rgEWNqPuWTT9-6Ku5o,156
 fusion_bench/method/dare/simple_average.py,sha256=jR08PokPIr5PWSZbGVOp3IApgKvxAIovg3vnB2KiTwk,906
 fusion_bench/method/dare/task_arithmetic.py,sha256=Seno_2BhuogdRxXOni8alnHG-fdW15_OWoAvMoBoJj0,2780
@@ -85,6 +91,9 @@ fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqve
 fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
 fusion_bench/method/fisher_merging/fisher_merging.py,sha256=CPU-tJiDv9FCIBYl7Pn0zA5cdRB1Md5kWchRDlJgly0,20456
 fusion_bench/method/fisher_merging/gpt2_fisher_merging.py,sha256=LZmz41jZ5dSsAHxfOUpr3u2rlCgUPTDR7xMsIlQM-jc,7576
+fusion_bench/method/isotropic_merging/__init__.py,sha256=0mxrl1UIjeFAPQcPcZtbgoCJO-DMW_49GKAhgcG-vEA,585
+fusion_bench/method/isotropic_merging/iso.py,sha256=MwKqfk0oyxqtdOzeSx_9jFXX1a4Rd0WcEPsYvQhBSCg,3773
+fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
 fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
 fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
 fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
@@ -151,7 +160,7 @@ fusion_bench/method/tall_mask/utils.py,sha256=Wlp8WcPwR_lCaBIZ9rgG6ewLfSzz3G7kPk
 fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
 fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=1D0uuNtqyA1VS35jh6AnEVsX72HnT02THyerck_lmso,5441
 fusion_bench/method/task_singular_vector/TSVC.py,sha256=yn4SrZNvtA6PoGYJmbmtNeDyDbGnRCgfZ7ZCg914AZU,410
-fusion_bench/method/task_singular_vector/TSVM.py,sha256=ANBGC1GM8c9oy_xlY-ZEyoWO9mnZh5aiF_rrvVH73l8,1925
+fusion_bench/method/task_singular_vector/TSVM.py,sha256=H5RzZlQQeF4kZFjuxkz8v3gyVKS3iKPgqNnitKQzbXk,2787
 fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
 fusion_bench/method/task_singular_vector/utils/TSVC_utils.py,sha256=FytKbal48EW6iGIA-2zV7QSVbYTVflXr4Mr56q0W75k,2286
 fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=dsTMQ15zFJ1MPqDOt2TJ01O9Bwq_klyG9xL9hRD2aI0,27521
@@ -251,6 +260,7 @@ fusion_bench/models/surgery/surgerymodelwrapper.py,sha256=F8jX88K5zVWC6HsfN-nGNk
 fusion_bench/models/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 fusion_bench/models/wrappers/ensemble.py,sha256=wIMZMRyXw5boWAm96c4Tiyebs_HDQovKxpGQ8rLnHUQ,6308
 fusion_bench/models/wrappers/layer_wise_fusion.py,sha256=ZizBGQtSLKOzMLFAhrMNMcv6ZNdvABTyO7M1-DGHh3c,12316
+fusion_bench/models/wrappers/layer_wise_fusion_doge_ta.py,sha256=k335dxzq3ezuYkDVOv4ePi128NVyiHVCW6zyuDRTg30,20689
 fusion_bench/models/wrappers/task_wise_fusion.py,sha256=Wn3buQvWw_lihWaKB03_iz34cBPzwBD94kBT6uafWVQ,8404
 fusion_bench/optim/__init__.py,sha256=lemrcuiA6OLjQkpYm-RP-Ox2MgjngN1ywvCo0NgShlM,61
 fusion_bench/optim/exception.py,sha256=fMgo1heiqfGhuI5RIbf30BwWSShn5RQiyeb30QtfTI0,1607
@@ -462,6 +472,7 @@ fusion_bench_config/method/pwe_moe_ls_for_clip.yaml,sha256=brs9zYeuXfFnnCoRrSaAY
 fusion_bench_config/method/simple_average.yaml,sha256=GtMNvt0-qWOevRX2V6fjiYUO2BwDvMw-EcxRMS_PhZQ,53
 fusion_bench_config/method/task_arithmetic.yaml,sha256=TbpAeTwIX48PFOkZU-Ihuu6U9Y5XHZJGDu7vHLt5FjU,74
 fusion_bench_config/method/ties_merging.yaml,sha256=N-XyOTEW0JRtyRJizpHqtb1GEIogUU22XSG76QvIvnw,292
+fusion_bench_config/method/DOGE_TA/DOGE_TA.yaml,sha256=6R9NRuWmj0oapJ_raMB6R6rZPMckt2JtMLrTQ6HhrFc,77
 fusion_bench_config/method/ada_svd/clip_vision.yaml,sha256=KDpDpzuNVqqyyqJcL0q-Ml2A7IUqn_-2dOZXs8zHKlU,184
 fusion_bench_config/method/adamerging/clip.yaml,sha256=fBG7jBBepygKpCbM3fmUeVAr2zzx0g8C21rGGfnEPkA,730
 fusion_bench_config/method/adamerging/layer_wise_flan_t5.yaml,sha256=7FPPMf6lcOD2dlNUbb5JyF3pqJ3D2jmvbWAbW9WGn0Y,546
@@ -474,6 +485,10 @@ fusion_bench_config/method/classification/clip_finetune.yaml,sha256=yWjcdKYaKvy5
 fusion_bench_config/method/concrete_subspace/clip_concrete_layer_wise_adamerging.yaml,sha256=XsHzr_5NoUZs0Us3eVwP3lUYXYvyJwGEEG9aDI_Z0rU,740
 fusion_bench_config/method/concrete_subspace/clip_concrete_task_arithmetic.yaml,sha256=eNoqcY1iMbs0Y5kKi_ya3rmQQMHqU7ht3EU7G_xmwN0,746
 fusion_bench_config/method/concrete_subspace/clip_concrete_task_wise_adamerging.yaml,sha256=WgTJj28FlVjR0_mCGJC5B8aJa9yezI3QusoXXHOrFoU,739
+fusion_bench_config/method/concrete_subspace/clip_post_defense_AWM.yaml,sha256=eGUCntXzDtW0tYX1vij7BHgDWzWq6sz2yFipVZj6z9E,849
+fusion_bench_config/method/concrete_subspace/clip_post_defense_SAU.yaml,sha256=DUYOU5A8MQw2cTqbraIDMFC7ciO8RXE2qXgVEEUudLM,891
+fusion_bench_config/method/concrete_subspace/clip_safe_concrete_layer_wise_adamerging.yaml,sha256=olDW_p5gyyaynwbGAQgm2ZicYAx9n3i4FprxPecuUsU,923
+fusion_bench_config/method/concrete_subspace/clip_safe_concrete_task_arithmetic.yaml,sha256=KLO3C1BdeB6FBKHT0xG4V0OFk7ib2SeMScKeaN5BlsU,863
 fusion_bench_config/method/dare/simple_average.yaml,sha256=oTFSCHul86NTjTtJYK5pNr3tuxW7XxNI-y6fL9Yo4VI,113
 fusion_bench_config/method/dare/task_arithmetic.yaml,sha256=Cvsam89yquamn_GkITT6q8qFKN_Yb5nv8p-XgvnVrgU,134
 fusion_bench_config/method/dare/ties_merging.yaml,sha256=50mPiRkzLN7gxaIs56sPWkAUSvqvdxjQJ8eVl1yUGOg,418
@@ -484,6 +499,8 @@ fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=U_wQXtogtgiqOT
 fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml,sha256=rl7kfVvdo2pG-DnglQUbjzkyBqnq1FpfoSDSjFtdLwk,633
 fusion_bench_config/method/fisher_merging/fisher_merging.yaml,sha256=B1wrv9mhaOID4KcAUEMZNxlvY3tR3Q3UGualFslvx-Y,475
 fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml,sha256=AE7XZqRDj4__J_ipEcjPs7qTB2J3xLQyFRlq1W4iHFE,563
+fusion_bench_config/method/isotropic_merging/iso_c.yaml,sha256=Lh_OtTaUJ08--h85fUr2asF85xLe1NMCK8fVAhHOzdQ,82
+fusion_bench_config/method/isotropic_merging/iso_cts.yaml,sha256=x5vZo__kO8njl4_gFdXnOt15X_qFLv6-diSWHOR4clw,111
 fusion_bench_config/method/linear/expo.yaml,sha256=St3NW6cKVRV3vCn8y0gxQ8k66VTdtsLTEWQTbO9wQ0Y,420
 fusion_bench_config/method/linear/linear_interpolation.yaml,sha256=IQgltk5REITSx8xLuLP11ByPbuMgy7dHz_BrxIgwOas,67
 fusion_bench_config/method/linear/llama_expo.yaml,sha256=SEsC-l5gugY0vlsQkTJqzVgWJnMjFzWuTz814UKbFeM,624
@@ -515,7 +532,7 @@ fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256
 fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=w1OWb38nW08K_hvrRMsCwmRxHWLGQfSSXg5nTiYaP8E,635
 fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml,sha256=J6vYIwqzh95-B3ekDias3FnCrVr4sig4zxpWyvz8hZ0,613
 fusion_bench_config/method/surgery/adamerging_surgery.yaml,sha256=Ne9JlJFgsRYcygBNCOBSN1ygBcLkE6I-8yusfTxyg-Y,826
-fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml,sha256=Se2v7AwwGqulXEVktRRzznpba4nNrWegY2bOwvjrHG8,74
+fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml,sha256=CLONjN9TXQ0OQwZHaje0q3WJWxR3LD1b5q5KrWJfZIA,169
 fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=mK09Ohsvj0Q6suj5qJM4DyCzRy192QBt4wjHS6W29IY,197
 fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=jiAco7M1XO0aekHFZKLKlXL_jRoCA8bgGD44Z7iB208,1001
 fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
@@ -719,9 +736,9 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml,sha256=2AqMiNCRRunLIrssHvFzu1lUzOaQn8uOHM9yjrQq-_A,109
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=iQMj2VpDTe_D8OfCo94w5Ud2MON-EGa0DzVr6UmphrA,436
 fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=i5Bn8bLl2cgqvrgtIGmoovUfSMehk_m-6C2wwcx5JMU,435
-fusion_bench-0.2.9.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
-fusion_bench-0.2.9.dist-info/METADATA,sha256=CvYwT5a-5o-Xbj9-HEgnl3gGAhNvss7Z1YggC8rYZcU,16779
-fusion_bench-0.2.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-fusion_bench-0.2.9.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
-fusion_bench-0.2.9.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
-fusion_bench-0.2.9.dist-info/RECORD,,
+fusion_bench-0.2.11.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
+fusion_bench-0.2.11.dist-info/METADATA,sha256=AYdGcKXZ6BeHCv1piGgpK1yktQqVga-PjUDxS4RYwog,16780
+fusion_bench-0.2.11.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
+fusion_bench-0.2.11.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
+fusion_bench-0.2.11.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
+fusion_bench-0.2.11.dist-info/RECORD,,

fusion-bench 0.2.9__py3-none-any.whl → 0.2.11__py3-none-any.whl

fusion-bench 0.2.9py3-none-any.whl → 0.2.11py3-none-any.whl