PyPI - fusion-bench - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl - Mend

fusion-bench 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

fusion_bench/method/linear/{task_arithmetic_for_llama.py → task_arithmetic_for_causallm.py} RENAMED Viewed

@@ -1,22 +1,27 @@
 import logging
+import os
 from typing import Dict, List, Mapping, Optional, TypeVar, Union  # noqa: F401
 from typing_extensions import override
-from fusion_bench import timeit_context
+from fusion_bench import auto_register_config, timeit_context
 from fusion_bench.method import TaskArithmeticAlgorithm
 from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
 from fusion_bench.modelpool import CausalLMBackbonePool, CausalLMPool
+from fusion_bench.models.hf_utils import create_default_model_card
 log = logging.getLogger(__name__)
-class TaskArithmeticForLlama(TaskArithmeticAlgorithm, SimpleProfilerMixin):
+@auto_register_config
+class TaskArithmeticForCausalLM(
+    TaskArithmeticAlgorithm,
+):
     R"""
     Examples:
     fusion_bench \
-        method=linear/task_arithmetic_for_llama \
+        method=linear/task_arithmetic_for_causallm \
             method.scaling_factor=0.3 \
         method.model_save_path=outputs/simle_mixtral_exp_v4/task_arithmetic_0.3 \
         modelpool=CausalLMPool/simle_mixtral_exp_v4.yaml
@@ -29,18 +34,14 @@ class TaskArithmeticForLlama(TaskArithmeticAlgorithm, SimpleProfilerMixin):
     def __init__(
         self,
         scaling_factor: float,
-        merge_backbone: bool,
+        merge_backbone: bool = False,
         model_save_path: Optional[str] = None,
+        **kwargs,
     ):
-        self.merge_backbone = merge_backbone
-        self.model_save_path = model_save_path
-        super().__init__(scaling_factor=scaling_factor)
+        super().__init__(scaling_factor=scaling_factor, **kwargs)
     @override
     def run(self, modelpool: CausalLMPool):
-        if self.model_save_path:
-            tokenizer = modelpool.load_tokenizer()
         if self.merge_backbone:
             assert modelpool.has_pretrained
             backbone_modelpool = CausalLMBackbonePool(**modelpool.config)
@@ -52,6 +53,15 @@ class TaskArithmeticForLlama(TaskArithmeticAlgorithm, SimpleProfilerMixin):
         if self.model_save_path is not None:
             with timeit_context(f"Saving the model to {self.model_save_path}"):
-                tokenizer.save_pretrained(self.model_save_path)
-                model.save_pretrained(self.model_save_path)
+                description = f"Merged model using task arithmetic with scaling factor {self.scaling_factor}."
+                modelpool.save_model(
+                    model=model,
+                    path=self.model_save_path,
+                    save_tokenizer=True,
+                    algorithm_config=self.config,
+                    description=description,
+                )
         return model
+TaskArithmeticForLlama = TaskArithmeticForCausalLM

fusion_bench/method/linear/ties_merging_for_causallm.py ADDED Viewed

@@ -0,0 +1,70 @@
+import logging
+import os
+from typing import Dict, List, Mapping, Optional, TypeVar, Union  # noqa: F401
+from typing_extensions import override
+from fusion_bench import auto_register_config, timeit_context
+from fusion_bench.method import TiesMergingAlgorithm
+from fusion_bench.mixins.simple_profiler import SimpleProfilerMixin
+from fusion_bench.modelpool import CausalLMBackbonePool, CausalLMPool
+from fusion_bench.models.hf_utils import create_default_model_card
+log = logging.getLogger(__name__)
+@auto_register_config
+class TiesMergingForCausalLM(
+    TiesMergingAlgorithm,
+):
+    R"""
+    TIES merging algorithm for CausalLM models.
+    This class extends the TiesMergingAlgorithm to work specifically with CausalLM models,
+    providing model saving capabilities and backbone merging support.
+    """
+    _config_mapping = TiesMergingAlgorithm._config_mapping | {
+        "merge_backbone": "merge_backbone",
+    }
+    def __init__(
+        self,
+        scaling_factor: float,
+        threshold: float,
+        remove_keys: List[str] = None,
+        merge_func: str = "sum",
+        merge_backbone: bool = False,
+        model_save_path: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            scaling_factor=scaling_factor,
+            threshold=threshold,
+            remove_keys=remove_keys,
+            merge_func=merge_func,
+            **kwargs,
+        )
+    @override
+    def run(self, modelpool: CausalLMPool):
+        if self.merge_backbone:
+            assert modelpool.has_pretrained
+            backbone_modelpool = CausalLMBackbonePool(**modelpool.config)
+            model = modelpool.load_model("_pretrained_")
+            backbone_model = super().run(backbone_modelpool)
+            model.model.layers = backbone_model
+        else:
+            model = super().run(modelpool)
+        if self.model_save_path is not None:
+            with timeit_context(f"Saving the model to {self.model_save_path}"):
+                description = f"Merged model using TIES merging with scaling factor {self.scaling_factor} and threshold {self.threshold}."
+                modelpool.save_model(
+                    model=model,
+                    path=self.model_save_path,
+                    save_tokenizer=True,
+                    algorithm_config=self.config,
+                    description=description,
+                )
+        return model

fusion_bench/method/opcm/opcm.py CHANGED Viewed

@@ -87,6 +87,7 @@ class OPCMForCLIP(
         # get the average model
         with self.profile("loading model"):
             merged_model = modelpool.load_model(model_names[0])
+            assert merged_model is not None, "Failed to load the first model"
         if self.evaluate_on_every_step:
             with self.profile("evaluating model"):

fusion_bench/method/pwe_moe/module.py CHANGED Viewed

@@ -13,8 +13,6 @@ import torch.func
 from torch import Tensor, nn
 from torch.nn import functional as F
-from fusion_bench.utils import join_list
 log = logging.getLogger(__name__)

fusion_bench/method/simple_average.py CHANGED Viewed

@@ -89,7 +89,7 @@ class SimpleAverageAlgorithm(
             modelpool = BaseModelPool(modelpool)
         log.info(
-            f"Fusing models using simple average on {len(modelpool.model_names)} models."
+            f"Fusing models using simple average on {len(modelpool.model_names)} models. "
             f"models: {modelpool.model_names}"
         )
         sd: Optional[StateDictType] = None
@@ -119,7 +119,7 @@ class SimpleAverageAlgorithm(
         if isinstance(forward_model, LazyStateDict):
             # if the model is a LazyStateDict, convert it to an empty module
-            forward_model = forward_model.meta_module.to_empty(
+            forward_model = deepcopy(forward_model.meta_module).to_empty(
                 device=forward_model._device
             )
         result = forward_model.load_state_dict(sd, strict=False)

fusion_bench/method/tall_mask/task_arithmetic.py CHANGED Viewed

@@ -15,7 +15,7 @@ from fusion_bench.utils.state_dict_arithmetic import (
     state_dict_add,
     state_dict_binary_mask,
     state_dict_diff_abs,
-    state_dict_hadmard_product,
+    state_dict_hadamard_product,
     state_dict_mul,
     state_dict_sub,
     state_dict_sum,
@@ -111,7 +111,7 @@ class TallMaskTaskArithmeticAlgorithm(
         with self.profile("compress and retrieve"):
             for model_name in modelpool.model_names:
-                retrieved_task_vector = state_dict_hadmard_product(
+                retrieved_task_vector = state_dict_hadamard_product(
                     tall_masks[model_name], multi_task_vector
                 )
                 retrieved_state_dict = state_dict_add(

fusion_bench/method/task_arithmetic/task_arithmetic.py CHANGED Viewed

@@ -6,11 +6,20 @@ http://arxiv.org/abs/2212.04089
 import logging
 from copy import deepcopy
-from typing import Dict, List, Mapping, Optional, TypeVar, Union  # noqa: F401
+from typing import (  # noqa: F401
+    TYPE_CHECKING,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    TypeVar,
+    Union,
+)
 import torch
 from torch import nn
+from fusion_bench import LazyStateDict
 from fusion_bench.method.base_algorithm import BaseAlgorithm
 from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
@@ -21,6 +30,8 @@ from fusion_bench.utils.state_dict_arithmetic import (
 )
 from fusion_bench.utils.type import StateDictType, TorchModelType
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
 log = logging.getLogger(__name__)
@@ -125,25 +136,39 @@ class TaskArithmeticAlgorithm(
             with self.profile("merge weights"):
                 if task_vector is None:
                     task_vector = state_dict_sub(
-                        model.state_dict(keep_vars=True),
-                        pretrained_model.state_dict(keep_vars=True),
+                        model.state_dict(),
+                        pretrained_model.state_dict(),
                     )
                 else:
                     task_vector = state_dict_add(
                         task_vector,
                         state_dict_sub(
-                            model.state_dict(keep_vars=True),
-                            pretrained_model.state_dict(keep_vars=True),
+                            model.state_dict(),
+                            pretrained_model.state_dict(),
                         ),
                     )
         with self.profile("merge weights"):
             # scale the task vector
             task_vector = state_dict_mul(task_vector, self.config.scaling_factor)
             # add the task vector to the pretrained model
-            state_dict = state_dict_add(
-                pretrained_model.state_dict(keep_vars=True), task_vector
-            )
+            state_dict = state_dict_add(pretrained_model.state_dict(), task_vector)
         self.print_profile_summary()
-        pretrained_model.load_state_dict(state_dict)
-        return pretrained_model
+        # apply state dict to model
+        if isinstance(pretrained_model, nn.Module):
+            model = pretrained_model
+            model.load_state_dict(state_dict)
+        elif isinstance(pretrained_model, LazyStateDict):
+            model = deepcopy(pretrained_model.meta_module)
+            model = model.to_empty(device=pretrained_model._device)
+            result = model.load_state_dict(state_dict, strict=False)
+            if result.unexpected_keys:
+                raise ValueError(
+                    f"Unexpected keys in state dict: {result.unexpected_keys}"
+                )
+            if result.missing_keys:
+                log.warning(f"Missing keys in state dict: {result.missing_keys}")
+        else:
+            raise TypeError(f"Unsupported model type: {type(pretrained_model)}")
+        return model

fusion_bench/method/ties_merging/ties_merging.py CHANGED Viewed

@@ -9,11 +9,14 @@ Overview of Ties-Merging:
 """
 import logging
+from copy import deepcopy
 from typing import Any, Dict, List, Literal, Mapping, Union  # noqa: F401
 import torch
 from torch import Tensor, nn
+from transformers import PreTrainedModel
+from fusion_bench import LazyStateDict
 from fusion_bench.compat.modelpool import to_modelpool
 from fusion_bench.method import BaseAlgorithm
 from fusion_bench.mixins import SimpleProfilerMixin, auto_register_config
@@ -98,12 +101,25 @@ class TiesMergingAlgorithm(
                 merge_func=merge_func,
             )
             merged_check = flat_ptm + scaling_factor * merged_tv
-            merged_state_dict = vector_to_state_dict(
+            state_dict = vector_to_state_dict(
                 merged_check, ptm_check, remove_keys=remove_keys
             )
-            # Load the merged state dict into the pretrained model
-            pretrained_model.load_state_dict(merged_state_dict)
         self.print_profile_summary()
-        return pretrained_model
+        # apply state dict to model
+        if isinstance(pretrained_model, nn.Module):
+            model = pretrained_model
+            model.load_state_dict(state_dict)
+        elif isinstance(pretrained_model, LazyStateDict):
+            model = deepcopy(pretrained_model.meta_module)
+            model = model.to_empty(device=pretrained_model._device)
+            result = model.load_state_dict(state_dict, strict=False)
+            if result.unexpected_keys:
+                raise ValueError(
+                    f"Unexpected keys in state dict: {result.unexpected_keys}"
+                )
+            if result.missing_keys:
+                log.warning(f"Missing keys in state dict: {result.missing_keys}")
+        else:
+            raise TypeError(f"Unsupported model type: {type(pretrained_model)}")
+        return model

fusion_bench/method/wudi/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .wudi import WUDIMerging, wudi_merging

fusion_bench/method/wudi/wudi.py ADDED Viewed

@@ -0,0 +1,105 @@
+"""
+Whoever Started the Interference Should End It:  Guiding Data-Free Model Merging via Task Vectors
+Arxiv: http://arxiv.org/abs/2503.08099
+"""
+from typing import List
+import torch
+from tqdm import tqdm
+from fusion_bench import BaseAlgorithm, BaseModelPool, auto_register_config
+from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.utils import timeit_context
+from fusion_bench.utils.state_dict_arithmetic import state_dict_add, state_dict_sub
+def wudi_merging(
+    task_vectors: List[torch.Tensor],
+    accelerator="cuda",
+    iter_num: int = 300,
+    exclude_keys: List[str] = None,
+):
+    exclude_keys = [] if exclude_keys is None else exclude_keys
+    with timeit_context("WUDI Merging"):
+        new_vector = {}
+        for key in tqdm(task_vectors[0], desc="WUDI Merging", leave=False):
+            tqdm.write(f"key: {key}")
+            original_device = task_vectors[0][key].device
+            tvs = torch.stack(
+                [
+                    task_vector[key].to(device=accelerator, non_blocking=True)
+                    for task_vector in task_vectors
+                ]
+            )
+            num_tvs = len(tvs)
+            new_vector[key] = torch.nn.Parameter(torch.sum(tvs, dim=0))
+            if len(task_vectors[0][key].shape) == 2 and key not in exclude_keys:
+                optimizer = torch.optim.Adam([new_vector[key]], lr=1e-5, weight_decay=0)
+                l2_norms = torch.square(
+                    torch.norm(tvs.reshape(tvs.shape[0], -1), p=2, dim=-1)
+                )
+                for i in tqdm(
+                    range(iter_num),
+                ):
+                    disturbing_vectors = new_vector[key].unsqueeze(0) - tvs
+                    product = torch.matmul(disturbing_vectors, tvs.transpose(1, 2))
+                    loss = torch.sum(
+                        torch.square(product) / l2_norms.unsqueeze(-1).unsqueeze(-1)
+                    )
+                    optimizer.zero_grad()
+                    loss.backward()
+                    optimizer.step()
+            else:
+                new_vector[key] = new_vector[key] / num_tvs
+            new_vector[key] = new_vector[key].to(
+                device=original_device, non_blocking=True
+            )
+    return new_vector
+@auto_register_config
+class WUDIMerging(
+    LightningFabricMixin,
+    BaseAlgorithm,
+):
+    """
+    Whoever Started the Interference Should End It:  Guiding Data-Free Model Merging via Task Vectors
+    """
+    def __init__(
+        self,
+        iter_num: int,
+        exclude_keys: List[str] = None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+    def run(self, modelpool: BaseModelPool):
+        # load the pretrained model and the task vectors of all the finetuned models
+        with torch.no_grad():
+            pretrained_model = modelpool.load_pretrained_model()
+            task_vectors = []
+            for model_name in modelpool.model_names:
+                finetuned_model = modelpool.load_model(model_name)
+                task_vectors.append(
+                    state_dict_sub(
+                        finetuned_model.state_dict(), pretrained_model.state_dict()
+                    )
+                )
+                del finetuned_model  # free memory
+        merged_tv = wudi_merging(
+            task_vectors,
+            accelerator=self.fabric.device,
+            iter_num=self.iter_num,
+            exclude_keys=self.exclude_keys,
+        )
+        pretrained_model.load_state_dict(
+            state_dict_add(pretrained_model.state_dict(), merged_tv)
+        )
+        return pretrained_model

fusion_bench/mixins/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@ _import_structure = {
     "hydra_config": ["HydraConfigMixin"],
     "lightning_fabric": ["LightningFabricMixin"],
     "openclip_classification": ["OpenCLIPClassificationMixin"],
+    "pyinstrument": ["PyinstrumentProfilerMixin"],
     "serialization": [
         "BaseYAMLSerializable",
         "YAMLSerializationMixin",
@@ -25,6 +26,7 @@ if TYPE_CHECKING:
     from .hydra_config import HydraConfigMixin
     from .lightning_fabric import LightningFabricMixin
     from .openclip_classification import OpenCLIPClassificationMixin
+    from .pyinstrument import PyinstrumentProfilerMixin
     from .serialization import (
         BaseYAMLSerializable,
         YAMLSerializationMixin,

fusion_bench/mixins/lightning_fabric.py CHANGED Viewed

@@ -100,6 +100,10 @@ class LightningFabricMixin:
             self.setup_lightning_fabric(getattr(self, "config", DictConfig({})))
         return self._fabric_instance
+    @fabric.setter
+    def fabric(self, instance: L.Fabric):
+        self._fabric_instance = instance
     @property
     def log_dir(self):
         """

fusion_bench/mixins/pyinstrument.py ADDED Viewed

@@ -0,0 +1,174 @@
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Generator, Optional, Union
+from lightning.fabric.utilities.rank_zero import rank_zero_only
+from pyinstrument import Profiler
+__all__ = ["PyinstrumentProfilerMixin"]
+class PyinstrumentProfilerMixin:
+    """
+    A mixin class that provides statistical profiling capabilities using pyinstrument.
+    This mixin allows for easy profiling of code blocks using a context manager.
+    It provides methods to start and stop profiling actions, save profiling results
+    to files, and print profiling summaries.
+    Note:
+        This mixin requires the `pyinstrument` package to be installed.
+        If not available, an ImportError will be raised when importing this module.
+    Examples:
+    ```python
+    class MyClass(PyinstrumentProfilerMixin):
+        def do_something(self):
+            with self.profile("work"):
+                # do some work here
+                ...
+            # save the profiling results
+            self.save_profile_report("profile_report.html")
+            # or print the summary
+            self.print_profile_summary()
+    ```
+    Attributes:
+        _profiler (Profiler): An instance of the pyinstrument Profiler class.
+    """
+    _profiler: Optional[Profiler] = None
+    _is_profiling: bool = False
+    @property
+    def profiler(self) -> Optional[Profiler]:
+        """Get the profiler instance, creating it if necessary."""
+        if self._profiler is None:
+            self._profiler = Profiler()
+        return self._profiler
+    @contextmanager
+    def profile(self, action_name: Optional[str] = None) -> Generator:
+        """
+        Context manager for profiling a code block.
+        Args:
+            action_name: Optional name for the profiling action (for logging purposes).
+        Example:
+        ```python
+        with self.profile("expensive_operation"):
+            # do some expensive work here
+            expensive_function()
+        ```
+        """
+        try:
+            self.start_profile(action_name)
+            yield action_name
+        finally:
+            self.stop_profile(action_name)
+    def start_profile(self, action_name: Optional[str] = None):
+        """
+        Start profiling.
+        Args:
+            action_name: Optional name for the profiling action.
+        """
+        if self._is_profiling:
+            return
+        self.profiler.start()
+        self._is_profiling = True
+        if action_name:
+            print(f"Started profiling: {action_name}")
+    def stop_profile(self, action_name: Optional[str] = None):
+        """
+        Stop profiling.
+        Args:
+            action_name: Optional name for the profiling action.
+        """
+        if not self._is_profiling:
+            return
+        self.profiler.stop()
+        self._is_profiling = False
+        if action_name:
+            print(f"Stopped profiling: {action_name}")
+    @rank_zero_only
+    def print_profile_summary(
+        self, title: Optional[str] = None, unicode: bool = True, color: bool = True
+    ):
+        """
+        Print a summary of the profiling results.
+        Args:
+            title: Optional title to print before the summary.
+            unicode: Whether to use unicode characters in the output.
+            color: Whether to use color in the output.
+        """
+        if self.profiler is None:
+            print("No profiling data available.")
+            return
+        if title is not None:
+            print(title)
+        print(self.profiler.output_text(unicode=unicode, color=color))
+    @rank_zero_only
+    def save_profile_report(
+        self,
+        output_path: Union[str, Path] = "profile_report.html",
+        format: str = "html",
+        title: Optional[str] = None,
+    ):
+        """
+        Save the profiling results to a file.
+        Args:
+            output_path: Path where to save the profiling report.
+            format: Output format ('html', or 'text').
+            title: Optional title for the report.
+        """
+        if self.profiler is None:
+            print("No profiling data available.")
+            return
+        output_path = Path(output_path)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        if format.lower() == "html":
+            content = self.profiler.output_html()
+        elif format.lower() == "text":
+            content = self.profiler.output_text(unicode=True, color=False)
+        else:
+            raise ValueError(f"Unsupported format: {format}. Use 'html', or 'text'.")
+        with open(output_path, "w", encoding="utf-8") as f:
+            f.write(content)
+        print(f"Profile report saved to: {output_path}")
+    def reset_profile(self):
+        """Reset the profiler to start fresh."""
+        if self._is_profiling:
+            self.stop_profile()
+        self._profiler = None
+    def __del__(self):
+        """Cleanup when the object is destroyed."""
+        if self._is_profiling:
+            self.stop_profile()
+        if self._profiler is not None:
+            del self._profiler
+            self._profiler = None

fusion-bench 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

fusion-bench 0.2.23py3-none-any.whl → 0.2.25py3-none-any.whl