PyPI - fusion-bench - Versions diffs - 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl - Mend

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (193) hide show

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 from torch import nn
 from fusion_bench.method.base_algorithm import BaseAlgorithm
-from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils import LazyStateDict
 from fusion_bench.utils.state_dict_arithmetic import (
@@ -59,12 +59,20 @@ def simple_average(
         return state_dict_avg(modules)
+@auto_register_config
 class SimpleAverageAlgorithm(
     BaseAlgorithm,
     SimpleProfilerMixin,
 ):
+    def __init__(self, show_pbar: bool = False, **kwargs):
+        """
+        Args:
+            show_pbar (bool): If True, shows a progress bar during model loading and merging. Default is False.
+        """
+        super().__init__(**kwargs)
     @torch.no_grad()
-    def run(self, modelpool: Union[BaseModelPool, Dict[str, nn.Module]]):
+    def run(self, modelpool: Union[BaseModelPool, Dict[str, nn.Module]]) -> nn.Module:
         """
         Fuse the models in the given model pool using simple averaging.
@@ -100,10 +108,14 @@ class SimpleAverageAlgorithm(
                     forward_model = model
                 else:
                     # Add the current model's state dictionary to the accumulated state dictionary
-                    sd = state_dict_add(sd, model.state_dict(keep_vars=True))
+                    sd = state_dict_add(
+                        sd, model.state_dict(keep_vars=True), show_pbar=self.show_pbar
+                    )
         with self.profile("merge weights"):
             # Divide the accumulated state dictionary by the number of models to get the average
-            sd = state_dict_div(sd, len(modelpool.model_names))
+            sd = state_dict_div(
+                sd, len(modelpool.model_names), show_pbar=self.show_pbar
+            )
         if isinstance(forward_model, LazyStateDict):
             # if the model is a LazyStateDict, convert it to an empty module

fusion_bench/method/slerp/slerp.py CHANGED Viewed

@@ -1,10 +1,13 @@
 import logging
+from typing import Any, Dict
 import torch
+from torch import nn
 from typing_extensions import override
 from fusion_bench.method import BaseAlgorithm
 from fusion_bench.modelpool import BaseModelPool
+from fusion_bench.utils.type import StateDictType
 from .slerp_utils import slerp
@@ -18,7 +21,7 @@ def slerp_on_state_dicts(
     *,
     DOT_THRESHOLD: float = 0.9995,
     epsilon: float = 1e-8,
-):
+) -> StateDictType:
     """
     Perform spherical linear interpolation (slerp) on the state dictionaries of two models.
@@ -72,7 +75,7 @@ class SlerpMergeAlgorithm(BaseAlgorithm):
         super().__init__()
     @override
-    def run(self, modelpool: BaseModelPool):
+    def run(self, modelpool: BaseModelPool) -> nn.Module:
         """
         Run the SlerpMergeAlgorithm on the given model pool.

fusion_bench/method/smile_upscaling/error_accumulation.py ADDED Viewed

@@ -0,0 +1,177 @@
+import os
+from typing import Literal, cast
+import pandas as pd
+import torch
+from omegaconf import DictConfig
+from torch import nn
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+from transformers import CLIPVisionModel
+from fusion_bench import BaseAlgorithm, BaseModelPool, auto_register_config
+from fusion_bench.dataset import CLIPDataset
+from fusion_bench.method import SmileUpscalingAlgorithm
+from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
+from fusion_bench.modelpool import CLIPVisionModelPool
+from fusion_bench.taskpool.clip_vision.taskpool import LayerWiseFeatureSaver
+from fusion_bench.utils.devices import clear_cuda_cache
+@auto_register_config
+class LowRankApproximation(BaseAlgorithm):
+    def __init__(self, rank: int, device: str = "cuda", **kwargs):
+        """Low-rank approximation of fine-tuned updates."""
+        super().__init__(**kwargs)
+    def run(self, modelpool: BaseModelPool):
+        # Implement low-rank approximation logic here
+        base_model = modelpool.load_pretrained_model()
+        models = {}
+        for model_name in tqdm(modelpool.model_names, "processing models"):
+            task_model = modelpool.load_model(model_name)
+            for module_name, module in task_model.named_modules():
+                if isinstance(module, nn.Linear):
+                    w = cast(
+                        nn.Linear, base_model.get_submodule(module_name)
+                    ).weight.to(dtype=torch.float32, device=self.device, copy=True)
+                    w_ft = module.weight.to(
+                        dtype=torch.float32, device=self.device, copy=True
+                    )
+                    # Compute low-rank approximation
+                    w_diff = w_ft - w
+                    u, s, vh = torch.linalg.svd(w_diff)
+                    v = vh.T
+                    u = u[:, : self.rank]
+                    s = s[: self.rank]
+                    v = v[:, : self.rank]
+                    low_rank_w_diff = torch.linalg.multi_dot((u, torch.diag(s), v.T))
+                    low_rank_w = w + low_rank_w_diff
+                    module.weight.data = low_rank_w.to(
+                        dtype=module.weight.dtype,
+                        device=module.weight.device,
+                    )
+            models[model_name] = task_model
+        return models
+@auto_register_config
+class ErrorAccumulationAnalysisForCLIP(
+    LightningFabricMixin,
+    BaseAlgorithm,
+):
+    def __init__(
+        self,
+        gate_k: int,
+        k: int,
+        seed: int = 42,
+        top_k: int = 1,
+        dataset_kwargs: DictConfig = None,
+        max_samples: int = 1024,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        if dataset_kwargs is None:
+            self.dataset_kwargs = DictConfig(
+                {
+                    "batch_size": 32,
+                    "num_workers": 4,
+                }
+            )
+    def run(self, modelpool: CLIPVisionModelPool):
+        assert self.fabric.world_size == 1, "Distributed inference is not supported."
+        # get the smile model
+        smile_algorithm = SmileUpscalingAlgorithm(
+            gate_k=self.gate_k, k=self.k, top_k=self.top_k, device=self.fabric.device
+        )
+        smile_model = smile_algorithm.run(modelpool)
+        # get low-rank models
+        low_rank_models = LowRankApproximation(rank=self.k).run(modelpool)
+        results = {
+            "model_name": [],
+            "method": [],
+            "layer_index": [],
+            "approximation_error": [],
+        }
+        for model_name in modelpool.model_names:
+            dataset = modelpool.load_test_dataset(model_name)
+            processor = modelpool.load_processor()
+            dataset = CLIPDataset(dataset, processor)
+            dataloader = DataLoader(dataset, shuffle=True, **self.dataset_kwargs)
+            dataloader = self.fabric.setup_dataloaders(dataloader)
+            # finetuned_model
+            finetuned_model = modelpool.load_model(model_name)
+            finetuned_model = self.to_device(finetuned_model)
+            self.collect_hidden_states(
+                finetuned_model,
+                dataloader=dataloader,
+                model_name=f"{model_name}/finetuned",
+            )
+            del finetuned_model
+            clear_cuda_cache()
+            # smile model
+            smile_model = self.to_device(smile_model)
+            self.collect_hidden_states(
+                smile_model, dataloader=dataloader, model_name=f"{model_name}/smile"
+            )
+            smile_model.cpu()
+            clear_cuda_cache()
+            # low-rank models
+            model = low_rank_models.pop(model_name)
+            model = self.to_device(model)
+            self.collect_hidden_states(
+                model, dataloader=dataloader, model_name=f"{model_name}/low-rank"
+            )
+            del model
+            clear_cuda_cache()
+            del dataloader
+            clear_cuda_cache()
+    @torch.no_grad()
+    def collect_hidden_states(
+        self, model: CLIPVisionModel, dataloader, model_name: str
+    ):
+        self.fabric.seed_everything(
+            self.seed, workers=True
+        )  # make sure to get same data samples
+        # register hooks
+        hooks = {}
+        hook_handles = {}
+        for i, layer in enumerate(model.vision_model.encoder.layers):
+            hooks[i] = LayerWiseFeatureSaver(
+                save_path=os.path.join(self.log_dir, model_name, f"layer_{i}.pth"),
+                first_token_only=True,
+            )
+            hook_handles[i] = layer.register_forward_hook(hooks[i])
+        # forward pass
+        num_total_samples = 0
+        for images, _ in tqdm(dataloader, desc=f"Collecting features for {model_name}"):
+            batch_size = images.size(0)
+            model(images)
+            num_total_samples += batch_size
+            if num_total_samples >= self.max_samples:
+                break
+        # save features
+        for i, hook in hooks.items():
+            hook.save_features()
+        # remove hooks
+        for i, hook_handle in hook_handles.items():
+            hook_handle.remove()
+        return hooks

fusion_bench/method/smile_upscaling/projected_energy.py ADDED Viewed

@@ -0,0 +1,145 @@
+import os
+from typing import Literal
+import pandas as pd
+import torch
+from fusion_bench import BaseAlgorithm, BaseModelPool, auto_register_config
+from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
+from tqdm import tqdm
+class ProjectedEnergyAnalysis(
+    SimpleProfilerMixin,
+    LightningFabricMixin,
+    BaseAlgorithm,
+):
+    def on_run_start(self):
+        self.device = self.fabric.device
+    def run(self, modelpool: BaseModelPool):
+        with self.profile("model loading"):
+            base_model = modelpool.load_pretrained_model()
+        results = {
+            "model_name": [],
+            "module_index": [],
+            "module_name": [],
+            "projected_energy_I": [],
+            "projected_energy_II": [],
+            "projected_energy_II_III": [],
+        }
+        for model_name in tqdm(
+            modelpool.model_names,
+            "analyzing",
+            dynamic_ncols=True,
+        ):
+            with self.profile("model loading"):
+                finetuned_model = modelpool.load_model(model_name)
+            module_index = 0
+            for module_name, base_module in tqdm(
+                list(base_model.named_modules()),
+                "analyzing modules",
+                dynamic_ncols=True,
+            ):
+                if isinstance(base_module, torch.nn.Linear):
+                    with self.profile("weight analysis"):
+                        _result = self.analyze_weight(
+                            base_module.weight,
+                            finetuned_model.get_submodule(module_name).weight,
+                        )
+                    results["model_name"].append(model_name)
+                    results["module_index"].append(module_index)
+                    results["module_name"].append(module_name)
+                    for key, value in _result.items():
+                        results[key].append(value)
+                    module_index += 1
+        # save results as csv
+        results = pd.DataFrame(results)
+        results.to_csv(
+            os.path.join(self.log_dir, "projected_energy_analysis.csv"), index=True
+        )
+        self.print_profile_summary()
+        return None
+    @torch.no_grad()
+    def analyze_weight(self, w: torch.Tensor, w_ft: torch.Tensor, k: int = -1):
+        w = w.to(dtype=torch.float32, device=self.device)
+        w_ft = w_ft.to(dtype=torch.float32, device=self.device)
+        w_diff = w_ft - w
+        # Perform analysis on the weight tensor
+        u, s, vh = torch.linalg.svd(w, full_matrices=False)
+        v = vh.T
+        if k < 0:
+            # find the position where the sum of singular values is larger than 50% of the total sum
+            cumsum = s.cumsum(0)
+            k = (cumsum < cumsum[-1] * 0.5).sum().item() + 1
+        # subspace I
+        w_diff_proj = self._project_subspace_low(u=u, s=s, v=v, k=k, w=w, w_ft=w_ft)
+        projected_energy_I = (
+            torch.linalg.norm(w_diff_proj, ord="fro") ** 2
+            / torch.linalg.norm(w_diff, ord="fro") ** 2
+        )
+        # subspace II
+        w_diff_proj = self._project_subspace_high(u=u, s=s, v=v, k=k, w=w, w_ft=w_ft)
+        projected_energy_II = (
+            torch.linalg.norm(w_diff_proj, ord="fro") ** 2
+            / torch.linalg.norm(w_diff, ord="fro") ** 2
+        )
+        ## subspace II+III
+        u, s, vh = torch.linalg.svd(w, full_matrices=True)
+        v = vh.T
+        w_diff_proj = self._project_subspace_high(u=u, s=s, v=v, k=k, w=w, w_ft=w_ft)
+        projected_energy_II_III = (
+            torch.linalg.norm(w_diff_proj, ord="fro") ** 2
+            / torch.linalg.norm(w_diff, ord="fro") ** 2
+        )
+        return {
+            "projected_energy_I": projected_energy_I.item(),
+            "projected_energy_II": projected_energy_II.item(),
+            "projected_energy_II_III": projected_energy_II_III.item(),
+        }
+    def _project_subspace_low(
+        self,
+        u: torch.Tensor,
+        s: torch.Tensor,
+        v: torch.Tensor,
+        k: int,
+        w: torch.Tensor,
+        w_ft: torch.Tensor,
+    ):
+        u = u[:, :k]
+        s = s[:k]
+        v = v[:, :k]
+        w_diff = w_ft - w
+        w_diff_proj = torch.linalg.multi_dot((u, u.T, w_diff, v, v.T))
+        return w_diff_proj
+    def _project_subspace_high(
+        self,
+        u: torch.Tensor,
+        s: torch.Tensor,
+        v: torch.Tensor,
+        k: int,
+        w: torch.Tensor,
+        w_ft: torch.Tensor,
+    ):
+        u = u[:, k:]
+        s = s[k:]
+        v = v[:, k:]
+        w_diff = w_ft - w
+        w_diff_proj = torch.linalg.multi_dot((u, u.T, w_diff, v, v.T))
+        return w_diff_proj

fusion_bench/method/smile_upscaling/smile_qwen2_upscaling.py CHANGED Viewed

@@ -16,10 +16,16 @@ from transformers.models.qwen2.modeling_qwen2 import Qwen2DecoderLayer
 from fusion_bench import BaseAlgorithm, BaseModelPool
 from fusion_bench.compat.modelpool import to_modelpool
-from fusion_bench.mixins import SimpleProfilerMixin
+from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
+from fusion_bench.modelpool import CausalLMPool
+from fusion_bench.models.hf_utils import (
+    generate_complete_readme,
+    save_pretrained_with_remote_code,
+)
 from fusion_bench.models.modeling_smile_qwen2 import (
     SmileQwen2Config,
     SmileQwen2ForCausalLM,
+    SmileQwen2Model,
 )
 from fusion_bench.models.modeling_smile_qwen2.modeling_smile_qwen2 import (
     SmileQwen2DecoderLayer,
@@ -34,6 +40,7 @@ from fusion_bench.utils.parameters import print_parameters
 log = logging.getLogger(__name__)
+@auto_register_config
 class SmileQwen2UpscalingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
     R"""
     SmileQwen2UpscalingAlgorithm is a model fusion algorithm designed to upscale
@@ -49,15 +56,7 @@ class SmileQwen2UpscalingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
             Merges the pretrained model with the fine-tuned models to create an upscaled model.
     """
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "device": "device",
-        "accelerator": "accelerator",
-        "model_path": "model_path",
-        "model_dtype": "model_dtype",
-        "num_experts_per_tok": "num_experts_per_tok",
-        "rank_of_router": "rank_of_router",
-        "rank_of_expert": "rank_of_expert",
-    }
+    modelpool: CausalLMPool
     def __init__(
         self,
@@ -68,20 +67,13 @@ class SmileQwen2UpscalingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
         num_experts_per_tok,
         rank_of_router,
         rank_of_expert,
+        save_with_remote_code: bool = True,
         **kwargs,
     ):
-        self.device = device
-        self.accelerator = accelerator
-        self.model_path = model_path
-        self.model_dtype = model_dtype
-        # SmileMoE parameters, except `num_local_experts` which is set later according to the number of finetuned models
-        self.num_experts_per_tok = num_experts_per_tok
-        self.rank_of_router = rank_of_router
-        self.rank_of_expert = rank_of_expert
         super().__init__(**kwargs)
     @torch.no_grad()
-    def run(self, modelpool: BaseModelPool) -> SmileQwen2ForCausalLM:
+    def run(self, modelpool) -> SmileQwen2ForCausalLM:
         """
         Executes the upscaling process.
@@ -129,13 +121,29 @@ class SmileQwen2UpscalingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
             if os.path.dirname(config.model_path):
                 os.makedirs(os.path.dirname(config.model_path), exist_ok=True)
             log.info(f"Saving model to {config.model_path}")
-            pretrained_model_config = self.modelpool.get_model_config("_pretrained_")
-            pretrained_path = pretrained_model_config.get(
-                "path", pretrained_model_config["pretrained_model_name_or_path"]
-            )
-            tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
+            tokenizer = self.modelpool.load_tokenizer()
             tokenizer.save_pretrained(config.model_path)
-            model.save_pretrained(config.model_path)
+            if not self.save_with_remote_code:
+                model.save_pretrained(config.model_path)
+            else:
+                save_pretrained_with_remote_code(
+                    model,
+                    auto_map={
+                        "AutoConfig": SmileQwen2Config,
+                        "AutoModel": SmileQwen2Model,
+                        "AutoModelForCausalLM": SmileQwen2ForCausalLM,
+                    },
+                    save_directory=config.model_path,
+                )
+            # save readme
+            complete_readme = generate_complete_readme(
+                algorithm=self,
+                modelpool=modelpool,
+                models=[modelpool.get_model_path(m) for m in modelpool.all_model_names],
+            )
+            with open(os.path.join(config.model_path, "README.md"), "w") as f:
+                f.write(complete_readme)
         return model
@@ -158,9 +166,12 @@ class SmileQwen2UpscalingAlgorithm(BaseAlgorithm, SimpleProfilerMixin):
         with init_empty_weights():
             pretrained_model_config = self.modelpool.get_model_config("_pretrained_")
-            pretrained_path = pretrained_model_config.get(
-                "path", pretrained_model_config["pretrained_model_name_or_path"]
-            )
+            if isinstance(pretrained_model_config, str):
+                pretrained_path = pretrained_model_config
+            else:
+                pretrained_path = pretrained_model_config.get(
+                    "path", pretrained_model_config["pretrained_model_name_or_path"]
+                )
             base_config = AutoConfig.from_pretrained(pretrained_path)
             model_config = SmileQwen2Config(
                 num_experts_per_tok=config.num_experts_per_tok,

fusion_bench/method/smile_upscaling/smile_upscaling.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import os
 from copy import deepcopy
-from typing import Dict, List, Tuple  # noqa: F401
+from typing import Any, Dict, List, Tuple  # noqa: F401
 import torch
 import torch.nn.functional as F
@@ -21,6 +21,7 @@ from fusion_bench.models.smile_moe.linear_from_module import (
 )
 from fusion_bench.models.utils import get_attr, set_attr
 from fusion_bench.utils.parameters import print_parameters
+from fusion_bench.utils.devices import get_device
 log = logging.getLogger(__name__)
@@ -54,7 +55,7 @@ class SmileUpscalingAlgorithm(
         routing_use_diff: bool = True,
         average_experts: bool = False,
         model_path: str = None,
-        **kwargs,
+        **kwargs: Any,
     ):
         """
         Initialize the SmileUpscalingAlgorithm.
@@ -91,7 +92,7 @@ class SmileUpscalingAlgorithm(
         print(f"=== Config for `{type(self).__name__}` ===")
     @torch.no_grad()
-    def run(self, modelpool: BaseModelPool):
+    def run(self, modelpool: BaseModelPool) -> nn.Module:
         """
         Executes the upscaling process.
@@ -142,7 +143,7 @@ class SmileUpscalingAlgorithm(
         pretrained_model: nn.Module,
         finetuned_models: List[nn.Module],
         in_place: bool = True,
-    ):
+    ) -> nn.Module:
         """
         Merges the pretrained model with the fine-tuned models to create an upscaled model.
@@ -180,7 +181,12 @@ class SmileUpscalingAlgorithm(
         name_list = name.split(".")
         module = get_attr(pretrained_model, name_list)
-        experts = [get_attr(m, name_list) for m in finetuned_models]
+        original_device = get_device(module)
+        module = module.to(self.device, non_blocking=True)
+        experts = [
+            get_attr(m, name_list).to(self.device, non_blocking=True)
+            for m in finetuned_models
+        ]
         try:
             moe_linear = SmileMoELinear(
                 module,
@@ -192,6 +198,7 @@ class SmileUpscalingAlgorithm(
                 full_matrices=self.full_matrices,
                 upscaling_accelerator=self.upscaling_accelerator,
             )
+            moe_linear = moe_linear.to(original_device, non_blocking=True)
         except ExpertNotTrainedError:
             print(f"skip {name} because the experts are not trained.")
             return

fusion_bench/method/tall_mask/task_arithmetic.py CHANGED Viewed

@@ -9,7 +9,7 @@ from copy import deepcopy
 import torch
 from fusion_bench import BaseAlgorithm
-from fusion_bench.mixins import SimpleProfilerMixin
+from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
@@ -58,16 +58,11 @@ def generate_task_masks(
     return final_mask
+@auto_register_config
 class TallMaskTaskArithmeticAlgorithm(
-    BaseAlgorithm,
     SimpleProfilerMixin,
+    BaseAlgorithm,
 ):
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "tall_mask_lambda": "tall_mask_lambda",
-        "debug": "debug",
-        "verbose": "verbose",
-    }
     def __init__(
         self,
         tall_mask_lambda: float,
@@ -76,9 +71,6 @@ class TallMaskTaskArithmeticAlgorithm(
         **kwargs,
     ):
         super().__init__(**kwargs)
-        self.tall_mask_lambda = tall_mask_lambda
-        self.debug = debug
-        self.verbose = verbose
     @torch.no_grad()
     def run(self, modelpool: BaseModelPool):

fusion_bench/method/task_arithmetic/task_arithmetic.py CHANGED Viewed

@@ -12,7 +12,7 @@ import torch
 from torch import nn
 from fusion_bench.method.base_algorithm import BaseAlgorithm
-from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
@@ -74,9 +74,10 @@ def task_arithmetic_merge(
     return pretrained_model
+@auto_register_config
 class TaskArithmeticAlgorithm(
-    BaseAlgorithm,
     SimpleProfilerMixin,
+    BaseAlgorithm,
 ):
     """
     Task Arithmetic Algorithm for model fusion.
@@ -89,22 +90,17 @@ class TaskArithmeticAlgorithm(
         scaling_factor (int): The factor by which the task vectors will be scaled before merging.
     """
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "scaling_factor": "scaling_factor"
-    }
-    def __init__(self, scaling_factor: int):
+    def __init__(self, scaling_factor: int, **kwargs):
         """
         Initializes the TaskArithmeticAlgorithm with the given scaling factor.
         Args:
             scaling_factor (int): The factor by which the task vectors will be scaled before merging.
         """
-        self.scaling_factor = scaling_factor
-        super().__init__()
+        super().__init__(**kwargs)
     @torch.no_grad()
-    def run(self, modelpool: Union[BaseModelPool, Dict[str, nn.Module]]):
+    def run(self, modelpool: Union[BaseModelPool, Dict[str, nn.Module]]) -> nn.Module:
         """
         Runs the Task Arithmetic Algorithm to fuse models in the given model pool.

fusion-bench 0.2.19__py3-none-any.whl → 0.2.21__py3-none-any.whl

fusion-bench 0.2.19py3-none-any.whl → 0.2.21py3-none-any.whl