PyPI - fusion-bench - Versions diffs - 0.2.22__tar.gz → 0.2.23__tar.gz - Mend

fusion-bench 0.2.22tar.gz → 0.2.23tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (951) hide show

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fusion_bench
-Version: 0.2.22
+Version: 0.2.23
 Summary: A Comprehensive Benchmark of Deep Model Fusion
 Author-email: Anke Tang <tang.anke@foxmail.com>
 Project-URL: Repository, https://github.com/tanganke/fusion_bench

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/__init__.py RENAMED Viewed

@@ -32,6 +32,10 @@ from .models import (
 from .programs import BaseHydraProgram
 from .taskpool import BaseTaskPool
 from .utils import (
+    BoolStateDictType,
+    LazyStateDict,
+    StateDictType,
+    TorchModelType,
     cache_with_joblib,
     get_rankzero_logger,
     import_object,

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/compat/method/__init__.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import warnings
+from typing import Any, List, Type
 from omegaconf import DictConfig
@@ -76,7 +77,9 @@ class AlgorithmFactory:
         return algorithm_cls(method_config)
     @staticmethod
-    def register_algorithm(name: str, algorithm_cls):
+    def register_algorithm(
+        name: str, algorithm_cls: Type[ModelFusionAlgorithm]
+    ) -> None:
         """
         Register a new algorithm with the factory.
@@ -87,7 +90,7 @@ class AlgorithmFactory:
         AlgorithmFactory._aglorithms[name] = algorithm_cls
     @classmethod
-    def available_algorithms(cls):
+    def available_algorithms(cls) -> List[str]:
         """
         Get a list of available algorithms.

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/compat/method/base_algorithm.py RENAMED Viewed

@@ -1,9 +1,10 @@
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Any, Optional
 from omegaconf import DictConfig
 if TYPE_CHECKING:
+    from fusion_bench import BaseModelPool
     from fusion_bench.programs.base_program import BaseHydraProgram
 __all__ = ["ModelFusionAlgorithm"]
@@ -51,7 +52,7 @@ class ModelFusionAlgorithm(ABC):
         pass
     @abstractmethod
-    def run(self, modelpool):
+    def run(self, modelpool: "BaseModelPool") -> Any:
         """
         Fuse the models in the given model pool.

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/compat/modelpool/base_pool.py RENAMED Viewed

@@ -42,7 +42,7 @@ class ModelPool(ABC):
             ), "Duplicate model names found in model pool"
             self._model_names = model_names
-    def __len__(self):
+    def __len__(self) -> int:
         """
         Return the number of models in the model pool, exclude special models such as `_pretrained_`.
@@ -66,7 +66,7 @@ class ModelPool(ABC):
         return names
     @property
-    def has_pretrained(self):
+    def has_pretrained(self) -> bool:
         """
         Check if the pretrained model is available in the model pool.
@@ -78,7 +78,7 @@ class ModelPool(ABC):
                 return True
         return False
-    def get_model_config(self, model_name: str):
+    def get_model_config(self, model_name: str) -> Dict:
         """
         Retrieves the configuration for a specific model from the model pool.

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/compat/taskpool/clip_image_classification.py RENAMED Viewed

@@ -169,7 +169,7 @@ class CLIPImageClassificationTaskPool(TaskPool):
             self._fabric = L.Fabric(devices=1)
             self._fabric.launch()
-        # CLIPVisionModel works the same with CLIPVisonTransformer, so we can use it directly
+        # CLIPVisionModel works the same with CLIPVisionTransformer, so we can use it directly
         self.clip_model.vision_model = model
         report = {}
         training_params, all_params = count_parameters(model)

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/dataset/gpt2_glue.py RENAMED Viewed

@@ -121,7 +121,7 @@ class TokenizedGLUE:
     def load_dataset(
         self, name: Literal["mrpc", "mnli", "cola", "sst2", "qnli", "qqp", "rte"]
-    ):
+    ) -> Dataset:
         """
         Load and tokenize a GLUE dataset.

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/method/__init__.py RENAMED Viewed

@@ -30,7 +30,7 @@ _import_structure = {
         "TaskArithmeticForLlama",
         "LinearInterpolationAlgorithm",
     ],
-    "slerp": ["SlerpMergeAlgorithm"],
+    "slerp": ["SlerpMergeAlgorithm", "SlerpForCausalLM"],
     "simple_average": ["SimpleAverageAlgorithm"],
     "weighted_average": ["WeightedAverageAlgorithm", "WeightedAverageForLLama"],
     "task_arithmetic": ["TaskArithmeticAlgorithm"],
@@ -71,6 +71,7 @@ _import_structure = {
     ],
     "fw_merging": ["FrankWolfeHardAlgorithm", "FrankWolfeSoftAlgorithm"],
     "tall_mask": ["TallMaskTaskArithmeticAlgorithm"],
+    "model_stock": ["ModelStock"],
     # plug-and-play model merging methods
     "concrete_subspace": [
         "ConcreteTaskArithmeticAlgorithmForCLIP",
@@ -194,6 +195,7 @@ if TYPE_CHECKING:
         MixtralUpscalingAlgorithm,
     )
     from .model_recombination import ModelRecombinationAlgorithm
+    from .model_stock import ModelStock
     from .opcm import OPCMForCLIP
     from .pruning import (
         MagnitudeDiffPruningAlgorithm,
@@ -213,7 +215,7 @@ if TYPE_CHECKING:
         RegMeanAlgorithmPlusPlus,
     )
     from .simple_average import SimpleAverageAlgorithm
-    from .slerp import SlerpMergeAlgorithm
+    from .slerp import SlerpForCausalLM, SlerpMergeAlgorithm
     from .smile_upscaling import (
         SingularProjectionMergingAlgorithm,
         SmileUpscalingAlgorithm,

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/method/analysis/task_vector_cos_similarity.py RENAMED Viewed

@@ -11,7 +11,7 @@ from torch import nn
 from tqdm.auto import tqdm
 from fusion_bench.method import BaseAlgorithm
-from fusion_bench.mixins import LightningFabricMixin
+from fusion_bench.mixins import LightningFabricMixin, auto_register_config
 from fusion_bench.modelpool import BaseModelPool
 from fusion_bench.utils.parameters import (
     StateDictType,
@@ -23,14 +23,50 @@ from fusion_bench.utils.state_dict_arithmetic import state_dict_sub
 log = logging.getLogger(__name__)
-class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
+@auto_register_config
+class TaskVectorCosSimilarity(
+    LightningFabricMixin,
+    BaseAlgorithm,
+):
     """
-    This class is similar to the Dummy algorithm,
-    but it also print (or save) the cosine similarity matrix between the task vectors of the models in the model pool.
+    Computes and analyzes cosine similarity between task vectors of models in a model pool.
+    This algorithm extracts task vectors from fine-tuned models by computing the difference
+    between their parameters and a pretrained base model. It then calculates the pairwise
+    cosine similarity between all task vectors to understand the relationships and overlap
+    between different tasks.
+    The task vector for a model is defined as:
+        task_vector = finetuned_model_params - pretrained_model_params
+    Args:
+        plot_heatmap (bool): Whether to generate and save a heatmap visualization
+        trainable_only (bool, optional): If True, only consider trainable parameters
+            when computing task vectors. Defaults to True.
+        max_points_per_model (int, optional): Maximum number of parameters to sample
+            per model for memory efficiency. If None, uses all parameters.
+        output_path (str, optional): Directory to save outputs. If None, uses the
+            fabric logger directory.
+    Outputs:
+        - task_vector_cos_similarity.csv: Pairwise cosine similarity matrix
+        - task_vector_cos_similarity.pdf: Heatmap visualization (if plot_heatmap=True)
+    Returns:
+        The pretrained model from the model pool.
+    Example:
+        ```python
+        >>> algorithm = TaskVectorCosSimilarity(
+        ...     plot_heatmap=True,
+        ...     trainable_only=True,
+        ...     output_path="/path/to/outputs"
+        ... )
+        >>> result = algorithm.run(modelpool)
+        ```
     """
     _config_mapping = BaseAlgorithm._config_mapping | {
-        "plot_heatmap": "plot_heatmap",
         "_output_path": "output_path",
     }
@@ -42,11 +78,8 @@ class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
         output_path: Optional[str] = None,
         **kwargs,
     ):
-        self.plot_heatmap = plot_heatmap
-        self.trainable_only = trainable_only
-        self.max_points_per_model = max_points_per_model
-        self._output_path = output_path
         super().__init__(**kwargs)
+        self._output_path = output_path
     @property
     def output_path(self):
@@ -57,6 +90,22 @@ class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
     @torch.no_grad()
     def run(self, modelpool: BaseModelPool):
+        """
+        Execute the task vector cosine similarity analysis.
+        This method:
+        1. Loads the pretrained base model from the model pool
+        2. Computes task vectors for each fine-tuned model
+        3. Calculates pairwise cosine similarities between all task vectors
+        4. Saves the similarity matrix as a CSV file
+        5. Optionally generates and saves a heatmap visualization
+        Args:
+            modelpool (BaseModelPool): Pool containing pretrained and fine-tuned models
+        Returns:
+            nn.Module: The pretrained model from the model pool
+        """
         pretrained_model = modelpool.load_pretrained_model()
         task_vectors = []
@@ -103,11 +152,14 @@ class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
     def _plot_heatmap(self, data: pd.DataFrame):
         """
-        This function plots a heatmap of the provided data using seaborn.
+        Generate and save a heatmap visualization of the cosine similarity matrix.
+        Creates a color-coded heatmap showing pairwise cosine similarities between
+        task vectors. The heatmap is saved as a PDF file in the output directory.
         Args:
-            data (pd.DataFrame): A pandas DataFrame containing the data to be plotted.
-            figsize (tuple): A tuple specifying the size of the figure. Default is (4, 3).
+            data (pd.DataFrame): Symmetric matrix of cosine similarities between
+                task vectors, with model names as both index and columns.
         Returns:
             None
@@ -141,6 +193,26 @@ class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
     def get_task_vector(
         self, pretrained_model: nn.Module, finetuned_model: nn.Module
     ) -> torch.Tensor:
+        """
+        Compute the task vector for a fine-tuned model.
+        The task vector represents the parameter changes from pretraining to
+        fine-tuning and is computed as:
+            task_vector = finetuned_params - pretrained_params
+        Args:
+            pretrained_model (nn.Module): The base pretrained model
+            finetuned_model (nn.Module): The fine-tuned model for a specific task
+        Returns:
+            torch.Tensor: Flattened task vector containing parameter differences.
+                If max_points_per_model is set, the vector may be downsampled.
+        Note:
+            - Converts parameters to float64 for numerical precision
+            - Supports optional downsampling for memory efficiency
+            - Uses only trainable parameters if trainable_only=True
+        """
         task_vector = state_dict_sub(
             self.get_state_dict(finetuned_model),
             self.get_state_dict(pretrained_model),
@@ -166,6 +238,17 @@ class TaskVectorCosSimilarity(BaseAlgorithm, LightningFabricMixin):
         return task_vector
     def get_state_dict(self, model: nn.Module):
+        """
+        Extract the state dictionary from a model.
+        Args:
+            model (nn.Module): The model to extract parameters from
+        Returns:
+            Dict[str, torch.Tensor]: State dictionary containing model parameters.
+                Returns only trainable parameters if trainable_only=True,
+                otherwise returns all parameters.
+        """
         if self.trainable_only:
             return trainable_state_dict(model)
         else:

fusion_bench-0.2.23/fusion_bench/method/analysis/task_vector_violin_plot.py ADDED Viewed

@@ -0,0 +1,313 @@
+import logging
+import os
+from typing import Dict, List, Optional, cast
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+import torch
+from numpy.typing import NDArray
+from torch import nn
+from tqdm.auto import tqdm
+from fusion_bench import BaseAlgorithm, BaseModelPool, StateDictType, timeit_context
+from fusion_bench.mixins import (
+    LightningFabricMixin,
+    SimpleProfilerMixin,
+    auto_register_config,
+)
+from fusion_bench.utils import state_dict_to_vector, trainable_state_dict
+from fusion_bench.utils.state_dict_arithmetic import state_dict_sub
+log = logging.getLogger(__name__)
+@auto_register_config
+class TaskVectorViolinPlot(
+    LightningFabricMixin,
+    SimpleProfilerMixin,
+    BaseAlgorithm,
+):
+    """
+    Creates violin plots to visualize the distribution of task vector values across models.
+    This class implements the task vector visualization technique described in:
+    "Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging"
+    by L. Shen, A. Tang, E. Yang et al. (https://arxiv.org/abs/2410.21804)
+    Task vectors represent the parameter differences between fine-tuned models and their
+    pretrained base model, computed as:
+        task_vector = finetuned_params - pretrained_params
+    The algorithm generates two types of violin plots:
+    1. Distribution of raw task vector values (positive and negative)
+    2. Distribution of absolute task vector values
+    Args:
+        trainable_only (bool): If True, only consider trainable parameters when computing
+            task vectors. If False, use all parameters.
+        max_points_per_model (int, optional): Maximum number of parameters to sample
+            per model for memory efficiency. If None or 0, uses all parameters.
+            Defaults to 1000.
+        fig_kwargs (dict, optional): Dictionary of keyword arguments to pass to
+            matplotlib.pyplot.subplots. Common options include:
+            - figsize: Tuple of (width, height) in inches
+            - dpi: Dots per inch for resolution
+            - facecolor: Figure background color
+            Defaults to None.
+        output_path (str, optional): Directory to save the violin plots. If None,
+            uses the fabric logger's log directory. Defaults to None.
+    Outputs:
+        - task_vector_violin.pdf: Violin plot of raw task vector value distributions
+        - task_vector_violin_abs.pdf: Violin plot of absolute task vector value distributions
+    Returns:
+        The pretrained model from the model pool.
+    Example:
+        ```python
+        plotter = TaskVectorViolinPlot(
+            trainable_only=True,
+            max_points_per_model=5000,
+            fig_kwargs={'figsize': (12, 8), 'dpi': 300},
+            output_path='./analysis_plots'
+        )
+        pretrained_model = plotter.run(modelpool)
+        ```
+    Note:
+        This visualization is particularly useful for understanding:
+        - How different tasks affect model parameters
+        - The magnitude and distribution of parameter changes
+        - Similarities and differences between task adaptations
+    """
+    # config_mapping is a mapping from the attributes to the key in the configuration files
+    _config_mapping = BaseAlgorithm._config_mapping | {
+        "_output_path": "output_path",
+    }
+    def __init__(
+        self,
+        trainable_only: bool,
+        max_points_per_model: Optional[int] = 1000,
+        fig_kwawrgs=None,
+        output_path: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        Initialize the TaskVectorViolinPlot analyzer.
+        Args:
+            trainable_only (bool): Whether to consider only trainable parameters when
+                computing task vectors. Set to True to focus on learnable parameters,
+                False to include all parameters including frozen ones.
+            max_points_per_model (int, optional): Maximum number of parameter values
+                to sample per model for visualization. Useful for large models to
+                manage memory usage and plot clarity. Set to None or 0 to use all
+                parameters. Defaults to 1000.
+            fig_kwargs (dict, optional): Keyword arguments passed to matplotlib's
+                subplots function for plot customization. Examples:
+                - {'figsize': (10, 6)} for plot dimensions
+                - {'dpi': 300} for high resolution
+                - {'facecolor': 'white'} for background color
+                Defaults to None (uses matplotlib defaults).
+            output_path (str, optional): Directory path where violin plots will be saved.
+                If None, uses the fabric logger's log directory. The directory will be
+                created if it doesn't exist. Defaults to None.
+            **kwargs: Additional keyword arguments passed to parent classes.
+        Note:
+            The parameter name 'fig_kwawrgs' appears to be a typo for 'fig_kwargs'.
+            This should be corrected in the parameter name for consistency.
+        """
+        super().__init__(**kwargs)
+        self._output_path = output_path
+    @property
+    def output_path(self):
+        if self._output_path is None:
+            return self.fabric.logger.log_dir
+        else:
+            return self._output_path
+    def run(self, modelpool: BaseModelPool):
+        """
+        Execute the task vector violin plot analysis and visualization.
+        This method implements the core algorithm that:
+        1. Loads the pretrained base model from the model pool
+        2. Computes task vectors for each fine-tuned model (parameter differences)
+        3. Creates two violin plots showing the distribution of task vector values:
+           - Raw values plot: Shows positive and negative parameter changes
+           - Absolute values plot: Shows magnitude of parameter changes
+        4. Saves both plots as PDF files in the output directory
+        The visualization technique follows the approach described in:
+        "Efficient and Effective Weight-Ensembling Mixture of Experts for Multi-Task Model Merging"
+        Args:
+            modelpool (BaseModelPool): Pool containing both a pretrained model and
+                fine-tuned models. Must have `has_pretrained=True`.
+        Returns:
+            nn.Module: The pretrained model loaded from the model pool.
+        Raises:
+            AssertionError: If the model pool doesn't contain a pretrained model.
+        Side Effects:
+            - Creates output directory if it doesn't exist
+            - Saves 'task_vector_violin.pdf' (raw values distribution)
+            - Saves 'task_vector_violin_abs.pdf' (absolute values distribution)
+            - Prints progress information during task vector computation
+        Example Output Files:
+            - task_vector_violin.pdf: Shows how parameters change (+ and -)
+            - task_vector_violin_abs.pdf: Shows magnitude of parameter changes
+        """
+        assert modelpool.has_pretrained
+        pretrained_model = modelpool.load_pretrained_model()
+        # Compute task vectors for each fine-tuned model
+        with torch.no_grad(), timeit_context("Computing task vectors"):
+            task_vectors: Dict[str, NDArray] = {}
+            for name, finetuned_model in tqdm(
+                modelpool.named_models(), total=len(modelpool)
+            ):
+                print(f"computing task vectors for {name}")
+                task_vectors[name] = self.get_task_vector(
+                    pretrained_model, finetuned_model
+                )
+        # === Create violin plot ===
+        fig, ax = plt.subplots(
+            1, 1, **self.fig_kwargs if self.fig_kwargs is not None else {}
+        )
+        fig = cast(plt.Figure, fig)
+        ax = cast(plt.Axes, ax)
+        # Prepare data for plotting
+        data = [values for values in task_vectors.values()]
+        labels = list(task_vectors.keys())
+        # Create violin plot using seaborn
+        with timeit_context("ploting"):
+            sns.violinplot(data=data, ax=ax)
+        # Customize plot
+        ax.set_xticklabels(labels, rotation=45, ha="right")
+        ax.set_ylabel("Task Vector Values")
+        ax.set_title("Distribution of Task Vector Values")
+        # Adjust layout to prevent label cutoff and save plot
+        plt.tight_layout()
+        os.makedirs(self.output_path, exist_ok=True)
+        output_file = f"{self.output_path}/task_vector_violin.pdf"
+        plt.savefig(output_file, bbox_inches="tight")
+        plt.close(fig)
+        # === Create violin plot (Abs values) ===
+        fig, ax = plt.subplots(
+            1, 1, **self.fig_kwargs if self.fig_kwargs is not None else {}
+        )
+        fig = cast(plt.Figure, fig)
+        ax = cast(plt.Axes, ax)
+        # Prepare data for plotting
+        data = [np.abs(values) for values in task_vectors.values()]
+        labels = list(task_vectors.keys())
+        # Create violin plot using seaborn
+        with timeit_context("ploting abs value plot"):
+            sns.violinplot(data=data, ax=ax)
+        # Customize plot
+        ax.set_xticklabels(labels, rotation=45, ha="right")
+        ax.set_ylabel("The Absolute Values")
+        ax.set_title("Distribution of Task Vector Absolute Values")
+        # Adjust layout to prevent label cutoff and save plot
+        plt.tight_layout()
+        os.makedirs(self.output_path, exist_ok=True)
+        output_file = f"{self.output_path}/task_vector_violin_abs.pdf"
+        plt.savefig(output_file, bbox_inches="tight")
+        plt.close(fig)
+        return pretrained_model
+    def get_task_vector(self, pretrained_model, finetuned_model):
+        """
+        Compute the task vector representing parameter changes from pretraining to fine-tuning.
+        The task vector quantifies how model parameters have changed during task-specific
+        fine-tuning and is computed as:
+            task_vector = finetuned_params - pretrained_params
+        Args:
+            pretrained_model (nn.Module): The base pretrained model
+            finetuned_model (nn.Module): The fine-tuned model for a specific task
+        Returns:
+            np.ndarray: Flattened numpy array containing parameter differences.
+                If max_points_per_model is set, the array may be randomly downsampled
+                for memory efficiency and visualization clarity.
+        Processing Steps:
+            1. Extract state dictionaries from both models
+            2. Compute parameter differences (subtraction)
+            3. Flatten to 1D vector
+            4. Convert to numpy array with float32 precision
+            5. Optionally downsample if max_points_per_model is specified
+        Note:
+            - Uses only trainable parameters if trainable_only=True
+            - Downsampling uses random sampling without replacement
+            - Preserves the relative distribution of parameter changes
+        """
+        task_vector = state_dict_sub(
+            self.get_state_dict(finetuned_model),
+            self.get_state_dict(pretrained_model),
+        )
+        task_vector = state_dict_to_vector(task_vector)
+        task_vector = task_vector.cpu().float().numpy()
+        # downsample if necessary
+        if (
+            self.max_points_per_model is not None
+            and self.max_points_per_model > 0
+            and task_vector.shape[0] > self.max_points_per_model
+        ):
+            log.info(
+                f"Downsampling task vectors to {self.max_points_per_model} points."
+            )
+            indices = np.random.choice(
+                task_vector.shape[0], self.max_points_per_model, replace=False
+            )
+            task_vector = task_vector[indices].copy()
+        return task_vector
+    def get_state_dict(self, model: nn.Module):
+        """
+        Extract the state dictionary from a model based on parameter filtering settings.
+        Args:
+            model (nn.Module): The PyTorch model to extract parameters from
+        Returns:
+            Dict[str, torch.Tensor]: State dictionary containing model parameters.
+                If trainable_only=True, returns only parameters with requires_grad=True.
+                If trainable_only=False, returns all parameters including frozen ones.
+        Note:
+            This method respects the trainable_only configuration to focus analysis
+            on either learnable parameters or the complete parameter set depending
+            on the research question being addressed.
+        """
+        if self.trainable_only:
+            return trainable_state_dict(model)
+        else:
+            return model.state_dict()

{fusion_bench-0.2.22 → fusion_bench-0.2.23}/fusion_bench/method/bitdelta/bitdelta.py RENAMED Viewed

@@ -6,7 +6,11 @@ import torch.nn.functional as F
 from tqdm.auto import tqdm
 from fusion_bench import BaseAlgorithm, BaseModelPool
-from fusion_bench.mixins import LightningFabricMixin, SimpleProfilerMixin
+from fusion_bench.mixins import (
+    LightningFabricMixin,
+    SimpleProfilerMixin,
+    auto_register_config,
+)
 from fusion_bench.modelpool import CausalLMPool
 from .bitdelta_utils.data import get_dataloader, get_dataset
@@ -15,23 +19,12 @@ from .bitdelta_utils.diff import compress_diff, save_diff, save_full_model
 log = logging.getLogger(__name__)
+@auto_register_config
 class BitDeltaAlgorithm(
-    BaseAlgorithm,
     LightningFabricMixin,
     SimpleProfilerMixin,
+    BaseAlgorithm,
 ):
-    _config_mapping = BaseAlgorithm._config_mapping | {
-        "save_dir": "save_dir",
-        "save_full_model": "save_full_model",
-        "lr": "lr",
-        "batch_size": "batch_size",
-        "num_steps": "num_steps",
-        "dataset_name": "dataset_name",
-        "subset": "subset",
-        "split": "split",
-        "max_length": "max_length",
-    }
     def __init__(
         self,
         save_dir: str,
@@ -46,15 +39,6 @@ class BitDeltaAlgorithm(
         **kwargs,
     ):
         super().__init__(**kwargs)
-        self.save_dir = save_dir
-        self.save_full_model = save_full_model
-        self.lr = lr
-        self.batch_size = batch_size
-        self.num_steps = num_steps
-        self.dataset_name = dataset_name
-        self.subset = subset
-        self.split = split
-        self.max_length = max_length
     def run(self, modelpool: CausalLMPool):
         if self.save_dir is None:

fusion-bench 0.2.22__tar.gz → 0.2.23__tar.gz

fusion-bench 0.2.22tar.gz → 0.2.23tar.gz