fusion-bench 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@ def load_fer2013(path: str = "clip-benchmark/wds_fer2013", split: str = "train")
7
7
  dataset = dataset.rename_columns({"jpg": "image", "cls": "label"})
8
8
  return dataset
9
9
 
10
+
10
11
  if __name__ == "__main__":
11
12
  dataset = load_fer2013(split="test")
12
13
  print(dataset)
@@ -54,6 +54,12 @@ _import_structure = {
54
54
  ],
55
55
  "ada_svd": ["AdaSVDMergingForCLIPVisionModel"],
56
56
  "task_singular_vector": ["TaskSingularVectorMerging"],
57
+ "isotropic_merging": [
58
+ "ISO_C_Merge", # alias
59
+ "ISO_CTS_Merge", # alias
60
+ "IsotropicMergingInCommonAndTaskSubspace",
61
+ "IsotropicMergingInCommonSubspace",
62
+ ],
57
63
  "opcm": ["OPCMForCLIP"],
58
64
  # plug-and-play model merging methods
59
65
  "concrete_subspace": [
@@ -120,6 +126,12 @@ if TYPE_CHECKING:
120
126
  WeightedEnsembleAlgorithm,
121
127
  )
122
128
  from .fisher_merging import FisherMergingForCLIPVisionModel
129
+ from .isotropic_merging import (
130
+ ISO_C_Merge,
131
+ ISO_CTS_Merge,
132
+ IsotropicMergingInCommonAndTaskSubspace,
133
+ IsotropicMergingInCommonSubspace,
134
+ )
123
135
  from .linear import (
124
136
  ExPOAlgorithm,
125
137
  ExPOAlgorithmForLlama,
@@ -26,8 +26,8 @@ from fusion_bench.models.hf_clip import HFCLIPClassifier
26
26
  from fusion_bench.models.linearized.linearized_model_utils import LinearizedModelWraper
27
27
  from fusion_bench.taskpool import CLIPVisionModelTaskPool
28
28
  from fusion_bench.utils.data import InfiniteDataLoader
29
- from fusion_bench.utils.json import load_from_json, save_to_json
30
29
  from fusion_bench.utils.fabric import seed_everything_by_time
30
+ from fusion_bench.utils.json import load_from_json, save_to_json
31
31
 
32
32
 
33
33
  class ContinualImageClassificationFineTuningForCLIP(
@@ -0,0 +1,15 @@
1
+ """
2
+ This module contains the implementation of the Isotropic Merging in Common Subspace (ISO-C) algorithm and Isotropic Merging in Common and Task-Specific Subspaces (Iso-CTS) algorithm.
3
+ Modified from the original implementation: https://github.com/danielm1405/iso-merging
4
+
5
+ Reference:
6
+ - Daniel Marczak, et al. No Task Left Behind: Isotropic Model Merging with Common and Task-Specific Subspaces. 2025.
7
+ https://arxiv.org/abs/2502.04959
8
+ """
9
+
10
+ from .iso import (
11
+ ISO_C_Merge,
12
+ ISO_CTS_Merge,
13
+ IsotropicMergingInCommonSubspace,
14
+ IsotropicMergingInCommonAndTaskSubspace,
15
+ )
@@ -0,0 +1,114 @@
1
+ from typing import List
2
+
3
+ import torch
4
+
5
+ from fusion_bench import BaseAlgorithm, BaseModelPool
6
+ from fusion_bench.mixins import LightningFabricMixin
7
+ from fusion_bench.utils.state_dict_arithmetic import (
8
+ state_dict_add,
9
+ state_dict_sub,
10
+ state_dict_mul,
11
+ )
12
+
13
+ from .iso_utils import iso_c, iso_cts, check_parameterNamesMatch
14
+
15
+
16
+ class IsotropicMergingInCommonSubspace(BaseAlgorithm, LightningFabricMixin):
17
+ """
18
+ Isotropic Merging in Common Subspace (Iso-C)
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ scaling_factor: float,
24
+ exclude_keys: List[str] = None,
25
+ ):
26
+ self.scaling_factor = scaling_factor
27
+ self.exclude_keys = exclude_keys
28
+ super().__init__()
29
+
30
+ def run(self, modelpool: BaseModelPool):
31
+ # load the pretrained model and the task vectors of all the finetuned models
32
+ with torch.no_grad():
33
+ pretrained_model = modelpool.load_pretrained_model()
34
+ task_vectors = []
35
+ for model_name in modelpool.model_names:
36
+ finetuned_model = modelpool.load_model(model_name)
37
+ task_vectors.append(
38
+ state_dict_sub(
39
+ finetuned_model.state_dict(), pretrained_model.state_dict()
40
+ )
41
+ )
42
+ del finetuned_model # free memory
43
+ check_parameterNamesMatch(task_vectors)
44
+
45
+ # compute the merged task vector
46
+ merged_tv = iso_c(
47
+ task_vectors,
48
+ accelerator=self.fabric.device,
49
+ exclude_keys=self.exclude_keys,
50
+ )
51
+
52
+ # merged_parameters = pretrained_parameters + scaling_factor * merged_task_vector
53
+ pretrained_model.load_state_dict(
54
+ state_dict_add(
55
+ pretrained_model.state_dict(),
56
+ state_dict_mul(merged_tv, self.scaling_factor),
57
+ )
58
+ )
59
+
60
+ return pretrained_model
61
+
62
+
63
+ class IsotropicMergingInCommonAndTaskSubspace(BaseAlgorithm, LightningFabricMixin):
64
+ """
65
+ Isotropic Merging in Common and Task-Specific Subspaces (Iso-CTS)
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ scaling_factor: float,
71
+ common_space_fraction: float,
72
+ exclude_keys: List[str] = None,
73
+ ):
74
+ self.common_space_fraction = common_space_fraction
75
+ self.scaling_factor = scaling_factor
76
+ self.exclude_keys = exclude_keys
77
+ super().__init__()
78
+
79
+ def run(self, modelpool: BaseModelPool):
80
+ # load the pretrained model and the task vectors of all the finetuned models
81
+ with torch.no_grad():
82
+ pretrained_model = modelpool.load_pretrained_model()
83
+ task_vectors = []
84
+ for model_name in modelpool.model_names:
85
+ finetuned_model = modelpool.load_model(model_name)
86
+ task_vectors.append(
87
+ state_dict_sub(
88
+ finetuned_model.state_dict(), pretrained_model.state_dict()
89
+ )
90
+ )
91
+ del finetuned_model # free memory
92
+ check_parameterNamesMatch(task_vectors)
93
+
94
+ # compute the merged task vector
95
+ merged_tv = iso_cts(
96
+ task_vectors,
97
+ common_space_fraction=self.common_space_fraction,
98
+ accelerator=self.fabric.device,
99
+ exclude_keys=self.exclude_keys,
100
+ )
101
+
102
+ # merged_parameters = pretrained_parameters + scaling_factor * merged_task_vector
103
+ pretrained_model.load_state_dict(
104
+ state_dict_add(
105
+ pretrained_model.state_dict(),
106
+ state_dict_mul(merged_tv, self.scaling_factor),
107
+ )
108
+ )
109
+
110
+ return pretrained_model
111
+
112
+
113
+ ISO_C_Merge = IsotropicMergingInCommonSubspace # alias
114
+ ISO_CTS_Merge = IsotropicMergingInCommonAndTaskSubspace # alias
@@ -0,0 +1,176 @@
1
+ import math
2
+ from typing import List
3
+
4
+ import torch
5
+
6
+ from fusion_bench.utils import timeit_context
7
+ from fusion_bench.utils.type import StateDictType
8
+
9
+
10
+ def iso_c(
11
+ task_vectors: List[StateDictType],
12
+ accelerator="cuda",
13
+ exclude_keys: List[str] = None,
14
+ ) -> StateDictType:
15
+ exclude_keys = [] if exclude_keys is None else exclude_keys
16
+
17
+ with torch.no_grad(), timeit_context("ISO-C Merging"):
18
+ new_vector = {}
19
+ for key in task_vectors[0]:
20
+ print(f"Merging {key}...")
21
+ original_device = task_vectors[0][key].device
22
+ tvs = [
23
+ task_vector[key].to(device=accelerator, non_blocking=True)
24
+ for task_vector in task_vectors
25
+ ]
26
+ num_tvs = len(tvs)
27
+ new_vector[key] = sum(tvs) / num_tvs
28
+ del tvs # free memory
29
+
30
+ if len(task_vectors[0][key].shape) == 2 and key not in exclude_keys:
31
+ # if the key is a 2D matrix, we need to merge the task vectors in the common space
32
+ new_vector[key] *= num_tvs
33
+ U, S, V = torch.linalg.svd(new_vector[key], full_matrices=False)
34
+ S_mean = torch.ones_like(S) * S.mean()
35
+
36
+ new_vector[key] = torch.linalg.multi_dot(
37
+ (
38
+ U,
39
+ torch.diag(S_mean),
40
+ V,
41
+ )
42
+ )
43
+ new_vector[key] = new_vector[key].to(
44
+ device=original_device, non_blocking=True
45
+ )
46
+ return new_vector
47
+
48
+
49
+ @torch.no_grad()
50
+ def iso_cts(
51
+ task_vectors: List[StateDictType],
52
+ common_space_fraction: float,
53
+ accelerator: str = "cuda",
54
+ exclude_keys: List[str] = None,
55
+ ):
56
+ exclude_keys = [] if exclude_keys is None else exclude_keys
57
+ new_vector = {}
58
+
59
+ print("ISO-CTS Merging")
60
+ for key in task_vectors[0]:
61
+ shape_ = task_vectors[0][key].shape
62
+ original_device = task_vectors[0][key].device
63
+ is_2d_matrix = (len(shape_) == 2) and (key not in exclude_keys)
64
+ if not is_2d_matrix:
65
+ print(f"Combining by avg {key}...")
66
+ for i, task_vector in enumerate(task_vectors):
67
+ vec = task_vector[key].to(device=accelerator, non_blocking=True)
68
+ if i == 0:
69
+ new_vector[key] = vec.clone()
70
+ else:
71
+ new_vector[key] += (vec - new_vector[key]) / (i + 1)
72
+
73
+ # move the new vector to the original device
74
+ new_vector[key] = new_vector[key].to(
75
+ device=original_device, non_blocking=True
76
+ )
77
+ continue
78
+
79
+ print(f"Computing common space using sum for {key}...")
80
+ combined_w = sum(
81
+ [
82
+ task_vector[key].to(device=accelerator, non_blocking=True)
83
+ for task_vector in task_vectors
84
+ ]
85
+ )
86
+
87
+ ### Calculate the common space size (making sure that task specific space is equally divisible) ###
88
+ common_space_index_s = int(min(shape_) * common_space_fraction)
89
+ _task_specific_total_space_index_s = round(
90
+ (min(shape_) - common_space_index_s) / len(task_vectors)
91
+ ) * len(task_vectors)
92
+ common_space_index_s = min(shape_) - _task_specific_total_space_index_s
93
+
94
+ u, s, v = torch.linalg.svd(combined_w, full_matrices=False)
95
+ common_space_u = u[:, :common_space_index_s]
96
+ common_space_s = s[:common_space_index_s]
97
+ common_space_v = v[:common_space_index_s, :]
98
+ ###################################################################
99
+
100
+ ### Calculate task specific space ###
101
+ n_dims_per_task = int((min(shape_) - common_space_index_s) / len(task_vectors))
102
+ for i, task_vector in enumerate(task_vectors):
103
+ w = task_vector[key].to(device=accelerator)
104
+
105
+ # calculate the projection onto task specific space to remove the common space
106
+ w_ts = w - common_space_u @ common_space_u.T @ w
107
+ u_ts, s_ts, v_ts = torch.linalg.svd(w_ts, full_matrices=False)
108
+
109
+ if i == 0:
110
+ combined_space_u = torch.zeros_like(u_ts, device=accelerator)
111
+ combined_space_s = torch.zeros_like(s_ts, device=accelerator)
112
+ combined_space_v = torch.zeros_like(v_ts, device=accelerator)
113
+
114
+ combined_space_u[:, i * n_dims_per_task : (i + 1) * n_dims_per_task] = u_ts[
115
+ :, :n_dims_per_task
116
+ ]
117
+ combined_space_s[i * n_dims_per_task : (i + 1) * n_dims_per_task] = s_ts[
118
+ :n_dims_per_task
119
+ ]
120
+ combined_space_v[i * n_dims_per_task : (i + 1) * n_dims_per_task, :] = v_ts[
121
+ :n_dims_per_task, :
122
+ ]
123
+ ###################################################################
124
+
125
+ combined_space_u[
126
+ :,
127
+ len(task_vectors) * n_dims_per_task : len(task_vectors) * n_dims_per_task
128
+ + common_space_index_s,
129
+ ] = common_space_u
130
+ combined_space_s[
131
+ len(task_vectors) * n_dims_per_task : len(task_vectors) * n_dims_per_task
132
+ + common_space_index_s
133
+ ] = common_space_s
134
+ combined_space_v[
135
+ len(task_vectors) * n_dims_per_task : len(task_vectors) * n_dims_per_task
136
+ + common_space_index_s,
137
+ :,
138
+ ] = common_space_v
139
+
140
+ ### Orthogonalize combined_space_u and combined_space_v ###
141
+ u_combined_space_u, s_combined_space_u, v_combined_space_u = torch.linalg.svd(
142
+ combined_space_u, full_matrices=False
143
+ )
144
+ u_combined_space_v, s_combined_space_v, v_combined_space_v = torch.linalg.svd(
145
+ combined_space_v, full_matrices=False
146
+ )
147
+ combined_space_u = u_combined_space_u @ v_combined_space_u
148
+ combined_space_v = u_combined_space_v @ v_combined_space_v
149
+ ###################################################################
150
+
151
+ combined_space_s = torch.ones_like(combined_space_s) * combined_space_s.mean()
152
+
153
+ new_vector[key] = torch.linalg.multi_dot(
154
+ (
155
+ combined_space_u,
156
+ torch.diag(combined_space_s),
157
+ combined_space_v,
158
+ )
159
+ )
160
+ new_vector[key] = new_vector[key].to(device=original_device, non_blocking=True)
161
+
162
+ return new_vector
163
+
164
+
165
+ def check_parameterNamesMatch(checkpoints):
166
+ parameter_names = set(checkpoints[0].keys())
167
+
168
+ if len(checkpoints) >= 2:
169
+ # raise ValueError("Number of models is less than 2.")
170
+ for checkpoint in checkpoints[1:]:
171
+ current_parameterNames = set(checkpoint.keys())
172
+ if current_parameterNames != parameter_names:
173
+ raise ValueError(
174
+ "Differing parameter names in models. "
175
+ f"The different parameters are {parameter_names.symmetric_difference(current_parameterNames)}"
176
+ )
@@ -9,15 +9,20 @@ fusion_bench \
9
9
  ```
10
10
  """
11
11
 
12
- from typing import List, Optional
12
+ from typing import List, Optional, Union, Iterable
13
13
 
14
14
  import torch
15
15
  from torch import Tensor, nn
16
+ from omegaconf import ListConfig
16
17
 
17
18
  from fusion_bench import BaseAlgorithm
18
19
  from fusion_bench.mixins import LightningFabricMixin
19
20
  from fusion_bench.utils import timeit_context
20
- from fusion_bench.utils.state_dict_arithmetic import state_dict_sub, state_dict_add
21
+ from fusion_bench.utils.state_dict_arithmetic import (
22
+ state_dict_add,
23
+ state_dict_sub,
24
+ state_dict_mul,
25
+ )
21
26
  from fusion_bench.utils.type import StateDictType
22
27
 
23
28
  from .utils import (
@@ -33,9 +38,11 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
33
38
 
34
39
  def __init__(
35
40
  self,
41
+ alpha: Union[float, Iterable[float]] = None,
36
42
  remove_keys: Optional[List[str]] = None,
37
43
  **kwargs,
38
44
  ):
45
+ self.alpha = alpha
39
46
  self.remove_keys = remove_keys if remove_keys is not None else []
40
47
  super().__init__(**kwargs)
41
48
 
@@ -50,6 +57,14 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
50
57
 
51
58
  with timeit_context("Flattening out Checkpoints"):
52
59
  task_vectors = [state_dict_sub(check, ptm_check) for check in ft_checks]
60
+ if isinstance(self.alpha, Iterable):
61
+ assert len(self.alpha) == len(
62
+ task_vectors
63
+ ), "Alpha and task vectors must have the same length"
64
+ task_vectors = [
65
+ state_dict_mul(state_dict=tv, scalar=alpha)
66
+ for alpha, tv in zip(self.alpha, task_vectors)
67
+ ]
53
68
 
54
69
  new_merged_tv = TSVM_utils.compute_and_sum_svd_mem_reduction(
55
70
  task_vectors,
@@ -57,6 +72,11 @@ class TaskSingularVectorMerging(BaseAlgorithm, LightningFabricMixin):
57
72
  accelerator=self.fabric.device,
58
73
  )
59
74
 
75
+ # If alpha is a float, we need to scale the new merged task vector by alpha
76
+ if self.alpha is not None and isinstance(self.alpha, float):
77
+ print(f"Scaling new merged task vector by alpha: {self.alpha}")
78
+ new_merged_tv = state_dict_mul(state_dict=new_merged_tv, scalar=self.alpha)
79
+
60
80
  pretrained_model.load_state_dict(
61
81
  state_dict_add(new_merged_tv, pretrained_model.state_dict())
62
82
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: fusion_bench
3
- Version: 0.2.9
3
+ Version: 0.2.10
4
4
  Summary: A Comprehensive Benchmark of Deep Model Fusion
5
5
  Author-email: Anke Tang <tang.anke@foxmail.com>
6
6
  License: MIT License
@@ -15,7 +15,7 @@ fusion_bench/constants/__init__.py,sha256=Pyc4dLbl6oNduOCdnpeXQ9LDyVoIrkdl9eZ_l2
15
15
  fusion_bench/constants/paths.py,sha256=DVZyQ9FLhkyUdw6ARpXUCAMf_B8hFyJ6UNI-oYly3pE,591
16
16
  fusion_bench/dataset/__init__.py,sha256=OJiYmcqz0Vm5O7mE4PB5QFJeL_KjrsseQTRsQATGTm4,1050
17
17
  fusion_bench/dataset/clip_dataset.py,sha256=XLpCOiXlLEP3DffAlBn4P2PpUenbEFl-Yk9MNy6nbbI,2790
18
- fusion_bench/dataset/fer2013.py,sha256=bAdujQSj1PcUVFlKJgqcHAuE9AWz7JE1fzZ6scFVvmc,403
18
+ fusion_bench/dataset/fer2013.py,sha256=Lub_xVhHfqaiPprvOsDVspJNioh1FjSrkhn3gL_UXDA,404
19
19
  fusion_bench/dataset/gpt2_glue.py,sha256=Qq1ZkEIQsTjj8tImvkZDNlduocSYwlEfVrDReZqDWdw,8761
20
20
  fusion_bench/dataset/gsm8k.py,sha256=CmANZ0A89PfPwVu_myKhXk1D9IwypOpjH3iqDo1KxcQ,2233
21
21
  fusion_bench/dataset/image_dataset.py,sha256=MSZE_UESyRRQDwnkm2KpyIARUg9SWcwqnH4fDNstzS4,1870
@@ -41,7 +41,7 @@ fusion_bench/dataset/llama/stanford_shp.py,sha256=6ueXKnFXIBBobacU1h5WxGLZrSOtBk
41
41
  fusion_bench/dataset/llama/ultrachat.py,sha256=Go7WvrDAYnm184fdazHGRYLbSY6Xd7jrESyQeUJtOww,1736
42
42
  fusion_bench/dataset/llama/wikitext.py,sha256=9ZHR-nMfXRumd3o-PIj3n7B83YlVeqpGkZ2zJs2B-9Y,2883
43
43
  fusion_bench/dataset/llama/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- fusion_bench/method/__init__.py,sha256=Hx_e9afWildQmZsHvInllReP1ACN2LfF8MjQqb3gkKc,6228
44
+ fusion_bench/method/__init__.py,sha256=py1hn-gRqwzESwS5KvDapclyUvIW42r0nDsuINgFBB4,6601
45
45
  fusion_bench/method/base_algorithm.py,sha256=5dutGZfPqNhO8F8FOlo3UFR91TZu2Xj7O0pTB40JvWo,1135
46
46
  fusion_bench/method/dummy.py,sha256=hb1y6LR_geRZ5eRgGwt5zJUcHYorCeIbs5i76CvurUc,1031
47
47
  fusion_bench/method/ensemble.py,sha256=rGxvJTeorfcBuE_e0XO-0-MAc9un7ZCC46ikKGuAcN4,3077
@@ -65,7 +65,7 @@ fusion_bench/method/analysis/task_vector_cos_similarity.py,sha256=pL-XsWTo258yZT
65
65
  fusion_bench/method/analysis/task_vector_violin_plot.py,sha256=ie8hPl6QsVz9MQ6C2OEpzIBxQnmVKNf1FPc5bThmQGM,7606
66
66
  fusion_bench/method/classification/__init__.py,sha256=emB06UOMDHK5pfQ1WuvLG9Fm0aEEtZxSjpVw8fVE0fM,167
67
67
  fusion_bench/method/classification/clip_finetune.py,sha256=DlV1isp8vz6jwXNYQ6zbblAoUfnssL-WBpDeaXI5BVw,15727
68
- fusion_bench/method/classification/continual_clip_finetune.py,sha256=v_2tmkb92okNbKRiW7AYdGEA7y2wQUwEjD01aeI99v0,11530
68
+ fusion_bench/method/classification/continual_clip_finetune.py,sha256=OLhZKS-6aCnafevZkZYcNMKTWDDj3DATB27eZl_i8EY,11530
69
69
  fusion_bench/method/concrete_subspace/__init__.py,sha256=yjadcpquHZbeZYsbfYhe2JlX46kObfiWJRsIoVcOEg4,223
70
70
  fusion_bench/method/concrete_subspace/clip_concrete_adamerging.py,sha256=90_0HkOIl0XQG89xMa0UiBhrwfV2YqfLxlS04AouR3o,24755
71
71
  fusion_bench/method/concrete_subspace/clip_concrete_task_arithmetic.py,sha256=Nx-3AiAeIt5zmcC21Ta2_-4cAQg9hOWvThurXNZzA-w,10580
@@ -85,6 +85,9 @@ fusion_bench/method/fisher_merging/__init__.py,sha256=KWsjrtxKkPYwcUA5rB_6UNIqve
85
85
  fusion_bench/method/fisher_merging/clip_fisher_merging.py,sha256=QCutGqjkfW3OWETPZsCChqLRAhvfJp4QKD9TGSpTyV0,7635
86
86
  fusion_bench/method/fisher_merging/fisher_merging.py,sha256=CPU-tJiDv9FCIBYl7Pn0zA5cdRB1Md5kWchRDlJgly0,20456
87
87
  fusion_bench/method/fisher_merging/gpt2_fisher_merging.py,sha256=LZmz41jZ5dSsAHxfOUpr3u2rlCgUPTDR7xMsIlQM-jc,7576
88
+ fusion_bench/method/isotropic_merging/__init__.py,sha256=Bg12OiltvZLMmZm066quvtG0LOWSVqI5RggYeaMDGFA,585
89
+ fusion_bench/method/isotropic_merging/iso.py,sha256=GILofZQiTcOnJRQ28RmzOjqkso5Xih9WuFuB2JDWA_M,3773
90
+ fusion_bench/method/isotropic_merging/iso_utils.py,sha256=7L8PYUIJROwHJQmhFY-tdEhkLAnzVKXr-ae55FQ1QSo,6928
88
91
  fusion_bench/method/linear/__init__.py,sha256=ChfkoOEAb-rUKwpowFPel-a1hRfS8gCrbnWD-jlRbe4,283
89
92
  fusion_bench/method/linear/expo.py,sha256=LCHTWlsPm1Mjhrq0mfpWLVC7skkI9ZksGduy3TxULoU,3939
90
93
  fusion_bench/method/linear/linear_interpolation.py,sha256=IONw9BPiRJouY8bE9Abfyz7qVI_1B1n8KGZa0f7Pza8,2157
@@ -151,7 +154,7 @@ fusion_bench/method/tall_mask/utils.py,sha256=Wlp8WcPwR_lCaBIZ9rgG6ewLfSzz3G7kPk
151
154
  fusion_bench/method/task_arithmetic/__init__.py,sha256=pSx_NV5Ra_6UXpyYWCi6ANQoAnEtymZt_X1dDN9wT4Y,96
152
155
  fusion_bench/method/task_arithmetic/task_arithmetic.py,sha256=1D0uuNtqyA1VS35jh6AnEVsX72HnT02THyerck_lmso,5441
153
156
  fusion_bench/method/task_singular_vector/TSVC.py,sha256=yn4SrZNvtA6PoGYJmbmtNeDyDbGnRCgfZ7ZCg914AZU,410
154
- fusion_bench/method/task_singular_vector/TSVM.py,sha256=ANBGC1GM8c9oy_xlY-ZEyoWO9mnZh5aiF_rrvVH73l8,1925
157
+ fusion_bench/method/task_singular_vector/TSVM.py,sha256=2MqeJazsZNBTKghrtZDqXE2XoO_BShK60n3SEMjV74k,2787
155
158
  fusion_bench/method/task_singular_vector/__init__.py,sha256=WMucyl9pu_Ev2kcdrfT4moqMMbzD7hHQVFME5Su5jMA,298
156
159
  fusion_bench/method/task_singular_vector/utils/TSVC_utils.py,sha256=FytKbal48EW6iGIA-2zV7QSVbYTVflXr4Mr56q0W75k,2286
157
160
  fusion_bench/method/task_singular_vector/utils/TSVM_utils.py,sha256=dsTMQ15zFJ1MPqDOt2TJ01O9Bwq_klyG9xL9hRD2aI0,27521
@@ -484,6 +487,8 @@ fusion_bench_config/method/ensemble/weighted_ensemble.yaml,sha256=U_wQXtogtgiqOT
484
487
  fusion_bench_config/method/fisher_merging/clip_fisher_merging.yaml,sha256=rl7kfVvdo2pG-DnglQUbjzkyBqnq1FpfoSDSjFtdLwk,633
485
488
  fusion_bench_config/method/fisher_merging/fisher_merging.yaml,sha256=B1wrv9mhaOID4KcAUEMZNxlvY3tR3Q3UGualFslvx-Y,475
486
489
  fusion_bench_config/method/fisher_merging/gpt2_fisher_merging.yaml,sha256=AE7XZqRDj4__J_ipEcjPs7qTB2J3xLQyFRlq1W4iHFE,563
490
+ fusion_bench_config/method/isotropic_merging/iso_c.yaml,sha256=Lh_OtTaUJ08--h85fUr2asF85xLe1NMCK8fVAhHOzdQ,82
491
+ fusion_bench_config/method/isotropic_merging/iso_cts.yaml,sha256=x5vZo__kO8njl4_gFdXnOt15X_qFLv6-diSWHOR4clw,111
487
492
  fusion_bench_config/method/linear/expo.yaml,sha256=St3NW6cKVRV3vCn8y0gxQ8k66VTdtsLTEWQTbO9wQ0Y,420
488
493
  fusion_bench_config/method/linear/linear_interpolation.yaml,sha256=IQgltk5REITSx8xLuLP11ByPbuMgy7dHz_BrxIgwOas,67
489
494
  fusion_bench_config/method/linear/llama_expo.yaml,sha256=SEsC-l5gugY0vlsQkTJqzVgWJnMjFzWuTz814UKbFeM,624
@@ -515,7 +520,7 @@ fusion_bench_config/method/sparselo_pruning/llama_iterative_sparselo.yaml,sha256
515
520
  fusion_bench_config/method/sparselo_pruning/llama_pcp_sparselo.yaml,sha256=w1OWb38nW08K_hvrRMsCwmRxHWLGQfSSXg5nTiYaP8E,635
516
521
  fusion_bench_config/method/sparselo_pruning/llama_sparselo.yaml,sha256=J6vYIwqzh95-B3ekDias3FnCrVr4sig4zxpWyvz8hZ0,613
517
522
  fusion_bench_config/method/surgery/adamerging_surgery.yaml,sha256=Ne9JlJFgsRYcygBNCOBSN1ygBcLkE6I-8yusfTxyg-Y,826
518
- fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml,sha256=Se2v7AwwGqulXEVktRRzznpba4nNrWegY2bOwvjrHG8,74
523
+ fusion_bench_config/method/task_singular_vector/TaskSingularVectorMerging.yaml,sha256=CLONjN9TXQ0OQwZHaje0q3WJWxR3LD1b5q5KrWJfZIA,169
519
524
  fusion_bench_config/method/trust_region/clip_task_arithmetic.yaml,sha256=mK09Ohsvj0Q6suj5qJM4DyCzRy192QBt4wjHS6W29IY,197
520
525
  fusion_bench_config/method/wemoe/sparse_weight_ensembling_moe.yaml,sha256=jiAco7M1XO0aekHFZKLKlXL_jRoCA8bgGD44Z7iB208,1001
521
526
  fusion_bench_config/method/wemoe/weight_ensembling_moe.yaml,sha256=OEv5yhyUCe5lXeT2PyXC49yrHXEM7i8SZDw6IQRDtAE,620
@@ -719,9 +724,9 @@ fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_sun397
719
724
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip-vit-single-task_svhn.yaml,sha256=2AqMiNCRRunLIrssHvFzu1lUzOaQn8uOHM9yjrQq-_A,109
720
725
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_rankone_wemoe_clip-vit-classification_TA8.yaml,sha256=iQMj2VpDTe_D8OfCo94w5Ud2MON-EGa0DzVr6UmphrA,436
721
726
  fusion_bench_config/taskpool/CLIPVisionModelTaskPool/clip_sparse_wemoe_clip-vit-classification_TA8.yaml,sha256=i5Bn8bLl2cgqvrgtIGmoovUfSMehk_m-6C2wwcx5JMU,435
722
- fusion_bench-0.2.9.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
723
- fusion_bench-0.2.9.dist-info/METADATA,sha256=CvYwT5a-5o-Xbj9-HEgnl3gGAhNvss7Z1YggC8rYZcU,16779
724
- fusion_bench-0.2.9.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
725
- fusion_bench-0.2.9.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
726
- fusion_bench-0.2.9.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
727
- fusion_bench-0.2.9.dist-info/RECORD,,
727
+ fusion_bench-0.2.10.dist-info/LICENSE,sha256=nhnOJlw4CPuPVE0qvkGmxfFgHmKi-6nzXvTu8t0NUdg,1066
728
+ fusion_bench-0.2.10.dist-info/METADATA,sha256=kBYozBf6hgA-7ebsn7znqJdhCz4H0dJSv2jVIEkBvyA,16780
729
+ fusion_bench-0.2.10.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
730
+ fusion_bench-0.2.10.dist-info/entry_points.txt,sha256=iUQ8MCJvda7HP4vYh2n1Teoapb4G9PBVYZkAfcc5SHU,116
731
+ fusion_bench-0.2.10.dist-info/top_level.txt,sha256=BuO4TL6iHL_2yPBUX9-LlIrHRczA_BNMIFwweK0PQEI,13
732
+ fusion_bench-0.2.10.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ _target_: fusion_bench.method.ISO_C_Merge
2
+
3
+ scaling_factor: 1.0
4
+ exclude_keys: null
@@ -0,0 +1,5 @@
1
+ _target_: fusion_bench.method.ISO_CTS_Merge
2
+
3
+ scaling_factor: 1.0
4
+ common_space_fraction: 0.8
5
+ exclude_keys: null
@@ -1,2 +1,8 @@
1
1
  _target_: fusion_bench.method.TaskSingularVectorMerging
2
2
  remove_keys: null
3
+
4
+ # alpha is a float or a list of floats
5
+ # example:
6
+ # alpha: 1
7
+ # alpha: [1, 0.5, 0.25]
8
+ alpha: 1